{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5392362951724642, "eval_steps": 500, "global_step": 29500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.827919644652421e-05, "grad_norm": 91.8160206307624, "learning_rate": 0.0, "loss": 34.0346, "step": 1 }, { "epoch": 3.655839289304842e-05, "grad_norm": 73.5999744271434, "learning_rate": 6.092916984006093e-09, "loss": 31.4361, "step": 2 }, { "epoch": 5.483758933957263e-05, "grad_norm": 51.695688615050905, "learning_rate": 1.2185833968012185e-08, "loss": 28.6464, "step": 3 }, { "epoch": 7.311678578609684e-05, "grad_norm": 50.539720587475585, "learning_rate": 1.827875095201828e-08, "loss": 27.7592, "step": 4 }, { "epoch": 9.139598223262106e-05, "grad_norm": 33.302774248523406, "learning_rate": 2.437166793602437e-08, "loss": 23.5683, "step": 5 }, { "epoch": 0.00010967517867914526, "grad_norm": 30.74690231214884, "learning_rate": 3.0464584920030467e-08, "loss": 22.8067, "step": 6 }, { "epoch": 0.0001279543751256695, "grad_norm": 23.259204581165505, "learning_rate": 3.655750190403656e-08, "loss": 22.4653, "step": 7 }, { "epoch": 0.0001462335715721937, "grad_norm": 38.374691871614225, "learning_rate": 4.265041888804266e-08, "loss": 21.5733, "step": 8 }, { "epoch": 0.0001645127680187179, "grad_norm": 27.796505852900417, "learning_rate": 4.874333587204874e-08, "loss": 21.1979, "step": 9 }, { "epoch": 0.00018279196446524212, "grad_norm": 24.10366506784843, "learning_rate": 5.483625285605484e-08, "loss": 21.5174, "step": 10 }, { "epoch": 0.00020107116091176632, "grad_norm": 16.145192145772253, "learning_rate": 6.092916984006093e-08, "loss": 20.6094, "step": 11 }, { "epoch": 0.00021935035735829052, "grad_norm": 15.63771708728853, "learning_rate": 6.702208682406704e-08, "loss": 20.3061, "step": 12 }, { "epoch": 0.00023762955380481475, "grad_norm": 14.09407904830339, "learning_rate": 7.311500380807313e-08, "loss": 20.0126, "step": 13 }, { "epoch": 0.000255908750251339, "grad_norm": 11.623036021341093, "learning_rate": 7.920792079207921e-08, "loss": 19.2338, "step": 14 }, { "epoch": 0.0002741879466978632, "grad_norm": 13.989361579308797, "learning_rate": 8.530083777608532e-08, "loss": 19.9629, "step": 15 }, { "epoch": 0.0002924671431443874, "grad_norm": 16.006656703513595, "learning_rate": 9.13937547600914e-08, "loss": 19.8983, "step": 16 }, { "epoch": 0.0003107463395909116, "grad_norm": 12.953914362703909, "learning_rate": 9.748667174409748e-08, "loss": 19.8281, "step": 17 }, { "epoch": 0.0003290255360374358, "grad_norm": 13.199082096754312, "learning_rate": 1.0357958872810359e-07, "loss": 19.4738, "step": 18 }, { "epoch": 0.00034730473248396, "grad_norm": 12.229065836228527, "learning_rate": 1.0967250571210967e-07, "loss": 18.9835, "step": 19 }, { "epoch": 0.00036558392893048424, "grad_norm": 15.118897649083674, "learning_rate": 1.1576542269611578e-07, "loss": 20.3097, "step": 20 }, { "epoch": 0.00038386312537700844, "grad_norm": 12.148133829458713, "learning_rate": 1.2185833968012187e-07, "loss": 19.2054, "step": 21 }, { "epoch": 0.00040214232182353264, "grad_norm": 11.55384831434423, "learning_rate": 1.2795125666412797e-07, "loss": 18.9609, "step": 22 }, { "epoch": 0.00042042151827005684, "grad_norm": 11.852696223714258, "learning_rate": 1.3404417364813407e-07, "loss": 19.2075, "step": 23 }, { "epoch": 0.00043870071471658104, "grad_norm": 12.726817530901487, "learning_rate": 1.4013709063214015e-07, "loss": 19.6638, "step": 24 }, { "epoch": 0.0004569799111631053, "grad_norm": 13.380375455690142, "learning_rate": 1.4623000761614625e-07, "loss": 19.8093, "step": 25 }, { "epoch": 0.0004752591076096295, "grad_norm": 9.618449769800641, "learning_rate": 1.5232292460015233e-07, "loss": 18.6443, "step": 26 }, { "epoch": 0.0004935383040561537, "grad_norm": 9.468083311223333, "learning_rate": 1.5841584158415843e-07, "loss": 19.0952, "step": 27 }, { "epoch": 0.000511817500502678, "grad_norm": 10.742512704213103, "learning_rate": 1.6450875856816453e-07, "loss": 19.2092, "step": 28 }, { "epoch": 0.0005300966969492021, "grad_norm": 9.384042455482811, "learning_rate": 1.7060167555217063e-07, "loss": 18.7381, "step": 29 }, { "epoch": 0.0005483758933957264, "grad_norm": 9.362811579992922, "learning_rate": 1.766945925361767e-07, "loss": 19.0655, "step": 30 }, { "epoch": 0.0005666550898422505, "grad_norm": 9.59019465210035, "learning_rate": 1.827875095201828e-07, "loss": 18.9933, "step": 31 }, { "epoch": 0.0005849342862887748, "grad_norm": 10.416158655166349, "learning_rate": 1.8888042650418892e-07, "loss": 19.3626, "step": 32 }, { "epoch": 0.0006032134827352989, "grad_norm": 9.362295378103198, "learning_rate": 1.9497334348819496e-07, "loss": 18.5162, "step": 33 }, { "epoch": 0.0006214926791818232, "grad_norm": 10.082680254606226, "learning_rate": 2.0106626047220107e-07, "loss": 18.8908, "step": 34 }, { "epoch": 0.0006397718756283474, "grad_norm": 11.776658098765381, "learning_rate": 2.0715917745620717e-07, "loss": 19.1424, "step": 35 }, { "epoch": 0.0006580510720748716, "grad_norm": 10.029439496961013, "learning_rate": 2.1325209444021327e-07, "loss": 19.0282, "step": 36 }, { "epoch": 0.0006763302685213958, "grad_norm": 8.464048988417401, "learning_rate": 2.1934501142421935e-07, "loss": 18.3312, "step": 37 }, { "epoch": 0.00069460946496792, "grad_norm": 10.778425266213326, "learning_rate": 2.2543792840822545e-07, "loss": 19.3784, "step": 38 }, { "epoch": 0.0007128886614144442, "grad_norm": 10.45936367907346, "learning_rate": 2.3153084539223155e-07, "loss": 18.772, "step": 39 }, { "epoch": 0.0007311678578609685, "grad_norm": 10.180810668857037, "learning_rate": 2.3762376237623766e-07, "loss": 19.0677, "step": 40 }, { "epoch": 0.0007494470543074926, "grad_norm": 9.486856492442154, "learning_rate": 2.4371667936024373e-07, "loss": 18.6111, "step": 41 }, { "epoch": 0.0007677262507540169, "grad_norm": 8.892386702121996, "learning_rate": 2.4980959634424986e-07, "loss": 18.8254, "step": 42 }, { "epoch": 0.000786005447200541, "grad_norm": 10.950408492270602, "learning_rate": 2.5590251332825594e-07, "loss": 19.4113, "step": 43 }, { "epoch": 0.0008042846436470653, "grad_norm": 10.6052965565659, "learning_rate": 2.61995430312262e-07, "loss": 19.4145, "step": 44 }, { "epoch": 0.0008225638400935895, "grad_norm": 9.928430603672055, "learning_rate": 2.6808834729626814e-07, "loss": 18.7649, "step": 45 }, { "epoch": 0.0008408430365401137, "grad_norm": 9.414699639763915, "learning_rate": 2.7418126428027417e-07, "loss": 19.1676, "step": 46 }, { "epoch": 0.0008591222329866379, "grad_norm": 10.136690731179094, "learning_rate": 2.802741812642803e-07, "loss": 18.9255, "step": 47 }, { "epoch": 0.0008774014294331621, "grad_norm": 9.41687674012297, "learning_rate": 2.8636709824828637e-07, "loss": 18.5135, "step": 48 }, { "epoch": 0.0008956806258796863, "grad_norm": 10.230953762022938, "learning_rate": 2.924600152322925e-07, "loss": 19.4077, "step": 49 }, { "epoch": 0.0009139598223262106, "grad_norm": 9.331096241633832, "learning_rate": 2.985529322162986e-07, "loss": 18.5752, "step": 50 }, { "epoch": 0.0009322390187727347, "grad_norm": 9.377359156140896, "learning_rate": 3.0464584920030465e-07, "loss": 18.6574, "step": 51 }, { "epoch": 0.000950518215219259, "grad_norm": 10.24702422065715, "learning_rate": 3.107387661843108e-07, "loss": 18.7512, "step": 52 }, { "epoch": 0.0009687974116657831, "grad_norm": 10.438749932564663, "learning_rate": 3.1683168316831686e-07, "loss": 19.1765, "step": 53 }, { "epoch": 0.0009870766081123074, "grad_norm": 9.395189052530064, "learning_rate": 3.2292460015232293e-07, "loss": 18.7879, "step": 54 }, { "epoch": 0.0010053558045588315, "grad_norm": 9.545581422460163, "learning_rate": 3.2901751713632906e-07, "loss": 18.5035, "step": 55 }, { "epoch": 0.001023635001005356, "grad_norm": 8.756905292427446, "learning_rate": 3.3511043412033514e-07, "loss": 18.8401, "step": 56 }, { "epoch": 0.00104191419745188, "grad_norm": 9.835134510268613, "learning_rate": 3.4120335110434127e-07, "loss": 18.6639, "step": 57 }, { "epoch": 0.0010601933938984042, "grad_norm": 9.329687900850928, "learning_rate": 3.472962680883473e-07, "loss": 18.5866, "step": 58 }, { "epoch": 0.0010784725903449283, "grad_norm": 10.385670344311434, "learning_rate": 3.533891850723534e-07, "loss": 19.4529, "step": 59 }, { "epoch": 0.0010967517867914527, "grad_norm": 8.319642063163402, "learning_rate": 3.594821020563595e-07, "loss": 18.2629, "step": 60 }, { "epoch": 0.0011150309832379769, "grad_norm": 10.166549933688021, "learning_rate": 3.655750190403656e-07, "loss": 19.0247, "step": 61 }, { "epoch": 0.001133310179684501, "grad_norm": 10.381261400913276, "learning_rate": 3.716679360243717e-07, "loss": 19.3155, "step": 62 }, { "epoch": 0.0011515893761310254, "grad_norm": 9.779550735131549, "learning_rate": 3.7776085300837783e-07, "loss": 18.7911, "step": 63 }, { "epoch": 0.0011698685725775495, "grad_norm": 8.899746443889216, "learning_rate": 3.838537699923839e-07, "loss": 18.8994, "step": 64 }, { "epoch": 0.0011881477690240737, "grad_norm": 9.321725176330542, "learning_rate": 3.8994668697638993e-07, "loss": 18.5516, "step": 65 }, { "epoch": 0.0012064269654705978, "grad_norm": 10.686659102112074, "learning_rate": 3.9603960396039606e-07, "loss": 19.1882, "step": 66 }, { "epoch": 0.0012247061619171222, "grad_norm": 10.278368781143437, "learning_rate": 4.0213252094440214e-07, "loss": 19.1586, "step": 67 }, { "epoch": 0.0012429853583636463, "grad_norm": 9.853789742977987, "learning_rate": 4.0822543792840826e-07, "loss": 18.8072, "step": 68 }, { "epoch": 0.0012612645548101705, "grad_norm": 8.424851011261469, "learning_rate": 4.1431835491241434e-07, "loss": 18.0205, "step": 69 }, { "epoch": 0.0012795437512566948, "grad_norm": 11.179083215450209, "learning_rate": 4.2041127189642047e-07, "loss": 19.1082, "step": 70 }, { "epoch": 0.001297822947703219, "grad_norm": 11.205875315547486, "learning_rate": 4.2650418888042655e-07, "loss": 19.1888, "step": 71 }, { "epoch": 0.0013161021441497431, "grad_norm": 9.218667827948646, "learning_rate": 4.325971058644327e-07, "loss": 18.5834, "step": 72 }, { "epoch": 0.0013343813405962675, "grad_norm": 9.114488209681602, "learning_rate": 4.386900228484387e-07, "loss": 18.3438, "step": 73 }, { "epoch": 0.0013526605370427916, "grad_norm": 9.147427856276416, "learning_rate": 4.4478293983244483e-07, "loss": 18.3276, "step": 74 }, { "epoch": 0.0013709397334893158, "grad_norm": 10.815359011016698, "learning_rate": 4.508758568164509e-07, "loss": 18.9537, "step": 75 }, { "epoch": 0.00138921892993584, "grad_norm": 11.106338765167994, "learning_rate": 4.5696877380045703e-07, "loss": 20.4775, "step": 76 }, { "epoch": 0.0014074981263823643, "grad_norm": 9.940729728721143, "learning_rate": 4.630616907844631e-07, "loss": 19.0393, "step": 77 }, { "epoch": 0.0014257773228288884, "grad_norm": 9.792218976670425, "learning_rate": 4.691546077684692e-07, "loss": 18.8914, "step": 78 }, { "epoch": 0.0014440565192754126, "grad_norm": 10.60799501326941, "learning_rate": 4.752475247524753e-07, "loss": 18.8633, "step": 79 }, { "epoch": 0.001462335715721937, "grad_norm": 9.136615230502608, "learning_rate": 4.813404417364814e-07, "loss": 18.6693, "step": 80 }, { "epoch": 0.001480614912168461, "grad_norm": 9.050880233447275, "learning_rate": 4.874333587204875e-07, "loss": 18.3404, "step": 81 }, { "epoch": 0.0014988941086149852, "grad_norm": 9.821799815962711, "learning_rate": 4.935262757044935e-07, "loss": 19.028, "step": 82 }, { "epoch": 0.0015171733050615094, "grad_norm": 9.00509710897049, "learning_rate": 4.996191926884997e-07, "loss": 18.6076, "step": 83 }, { "epoch": 0.0015354525015080338, "grad_norm": 8.4164016249053, "learning_rate": 5.057121096725057e-07, "loss": 18.1026, "step": 84 }, { "epoch": 0.001553731697954558, "grad_norm": 9.747465626841214, "learning_rate": 5.118050266565119e-07, "loss": 18.742, "step": 85 }, { "epoch": 0.001572010894401082, "grad_norm": 11.031842189183193, "learning_rate": 5.17897943640518e-07, "loss": 19.5137, "step": 86 }, { "epoch": 0.0015902900908476064, "grad_norm": 8.60703077645388, "learning_rate": 5.23990860624524e-07, "loss": 18.6961, "step": 87 }, { "epoch": 0.0016085692872941306, "grad_norm": 9.06509676047877, "learning_rate": 5.300837776085301e-07, "loss": 18.5877, "step": 88 }, { "epoch": 0.0016268484837406547, "grad_norm": 9.844496870043358, "learning_rate": 5.361766945925363e-07, "loss": 18.7855, "step": 89 }, { "epoch": 0.001645127680187179, "grad_norm": 9.277358662258633, "learning_rate": 5.422696115765424e-07, "loss": 18.7348, "step": 90 }, { "epoch": 0.0016634068766337032, "grad_norm": 9.252853720597152, "learning_rate": 5.483625285605483e-07, "loss": 18.6759, "step": 91 }, { "epoch": 0.0016816860730802274, "grad_norm": 8.84638620725077, "learning_rate": 5.544554455445545e-07, "loss": 18.4325, "step": 92 }, { "epoch": 0.0016999652695267515, "grad_norm": 10.139260624547632, "learning_rate": 5.605483625285606e-07, "loss": 19.0223, "step": 93 }, { "epoch": 0.0017182444659732759, "grad_norm": 10.300594130638922, "learning_rate": 5.666412795125667e-07, "loss": 18.9603, "step": 94 }, { "epoch": 0.0017365236624198, "grad_norm": 8.558786354910753, "learning_rate": 5.727341964965727e-07, "loss": 18.1716, "step": 95 }, { "epoch": 0.0017548028588663242, "grad_norm": 9.221680135788464, "learning_rate": 5.788271134805789e-07, "loss": 19.0363, "step": 96 }, { "epoch": 0.0017730820553128485, "grad_norm": 9.318146513867163, "learning_rate": 5.84920030464585e-07, "loss": 18.308, "step": 97 }, { "epoch": 0.0017913612517593727, "grad_norm": 8.89684007120383, "learning_rate": 5.910129474485911e-07, "loss": 18.3987, "step": 98 }, { "epoch": 0.0018096404482058968, "grad_norm": 8.770987219267509, "learning_rate": 5.971058644325972e-07, "loss": 18.2499, "step": 99 }, { "epoch": 0.0018279196446524212, "grad_norm": 8.715509627670407, "learning_rate": 6.031987814166032e-07, "loss": 18.3155, "step": 100 }, { "epoch": 0.0018461988410989453, "grad_norm": 9.157266713898336, "learning_rate": 6.092916984006093e-07, "loss": 18.7471, "step": 101 }, { "epoch": 0.0018644780375454695, "grad_norm": 8.141147516544505, "learning_rate": 6.153846153846155e-07, "loss": 17.8998, "step": 102 }, { "epoch": 0.0018827572339919936, "grad_norm": 9.099602296515032, "learning_rate": 6.214775323686216e-07, "loss": 18.5905, "step": 103 }, { "epoch": 0.001901036430438518, "grad_norm": 9.459641647038191, "learning_rate": 6.275704493526276e-07, "loss": 18.919, "step": 104 }, { "epoch": 0.0019193156268850421, "grad_norm": 8.533043987820777, "learning_rate": 6.336633663366337e-07, "loss": 18.2984, "step": 105 }, { "epoch": 0.0019375948233315663, "grad_norm": 8.831215555540918, "learning_rate": 6.397562833206399e-07, "loss": 18.4262, "step": 106 }, { "epoch": 0.0019558740197780904, "grad_norm": 9.760196162863846, "learning_rate": 6.458492003046459e-07, "loss": 18.7845, "step": 107 }, { "epoch": 0.001974153216224615, "grad_norm": 9.619501623532049, "learning_rate": 6.519421172886519e-07, "loss": 18.6491, "step": 108 }, { "epoch": 0.001992432412671139, "grad_norm": 9.120851474475579, "learning_rate": 6.580350342726581e-07, "loss": 18.7755, "step": 109 }, { "epoch": 0.002010711609117663, "grad_norm": 10.429387763449409, "learning_rate": 6.641279512566642e-07, "loss": 18.8206, "step": 110 }, { "epoch": 0.0020289908055641875, "grad_norm": 10.444785293681528, "learning_rate": 6.702208682406703e-07, "loss": 19.0005, "step": 111 }, { "epoch": 0.002047270002010712, "grad_norm": 10.653039310285068, "learning_rate": 6.763137852246764e-07, "loss": 19.1512, "step": 112 }, { "epoch": 0.0020655491984572357, "grad_norm": 8.025944215659788, "learning_rate": 6.824067022086825e-07, "loss": 18.0986, "step": 113 }, { "epoch": 0.00208382839490376, "grad_norm": 9.547795043033215, "learning_rate": 6.884996191926886e-07, "loss": 18.8097, "step": 114 }, { "epoch": 0.002102107591350284, "grad_norm": 9.19001868551479, "learning_rate": 6.945925361766946e-07, "loss": 18.7753, "step": 115 }, { "epoch": 0.0021203867877968084, "grad_norm": 9.39295941251693, "learning_rate": 7.006854531607008e-07, "loss": 18.4715, "step": 116 }, { "epoch": 0.0021386659842433328, "grad_norm": 7.970423062653836, "learning_rate": 7.067783701447068e-07, "loss": 18.0459, "step": 117 }, { "epoch": 0.0021569451806898567, "grad_norm": 9.510873550335704, "learning_rate": 7.128712871287129e-07, "loss": 18.8316, "step": 118 }, { "epoch": 0.002175224377136381, "grad_norm": 10.785468497817561, "learning_rate": 7.18964204112719e-07, "loss": 19.3941, "step": 119 }, { "epoch": 0.0021935035735829054, "grad_norm": 9.073789812838537, "learning_rate": 7.250571210967252e-07, "loss": 18.5127, "step": 120 }, { "epoch": 0.0022117827700294294, "grad_norm": 10.862325819460475, "learning_rate": 7.311500380807313e-07, "loss": 19.019, "step": 121 }, { "epoch": 0.0022300619664759537, "grad_norm": 7.8996003598539035, "learning_rate": 7.372429550647372e-07, "loss": 18.0205, "step": 122 }, { "epoch": 0.002248341162922478, "grad_norm": 8.825842860534118, "learning_rate": 7.433358720487434e-07, "loss": 18.3188, "step": 123 }, { "epoch": 0.002266620359369002, "grad_norm": 8.5897317430774, "learning_rate": 7.494287890327495e-07, "loss": 18.6975, "step": 124 }, { "epoch": 0.0022848995558155264, "grad_norm": 8.95155813372537, "learning_rate": 7.555217060167557e-07, "loss": 18.6758, "step": 125 }, { "epoch": 0.0023031787522620507, "grad_norm": 9.042809889012444, "learning_rate": 7.616146230007616e-07, "loss": 18.2421, "step": 126 }, { "epoch": 0.0023214579487085747, "grad_norm": 8.188339694755228, "learning_rate": 7.677075399847678e-07, "loss": 18.4488, "step": 127 }, { "epoch": 0.002339737145155099, "grad_norm": 9.252561792937517, "learning_rate": 7.738004569687739e-07, "loss": 18.3231, "step": 128 }, { "epoch": 0.0023580163416016234, "grad_norm": 9.19760399419201, "learning_rate": 7.798933739527799e-07, "loss": 18.5613, "step": 129 }, { "epoch": 0.0023762955380481473, "grad_norm": 9.755431876364451, "learning_rate": 7.85986290936786e-07, "loss": 18.842, "step": 130 }, { "epoch": 0.0023945747344946717, "grad_norm": 8.252542603191571, "learning_rate": 7.920792079207921e-07, "loss": 17.9852, "step": 131 }, { "epoch": 0.0024128539309411956, "grad_norm": 8.43311297243739, "learning_rate": 7.981721249047983e-07, "loss": 18.5464, "step": 132 }, { "epoch": 0.00243113312738772, "grad_norm": 9.599173769619222, "learning_rate": 8.042650418888043e-07, "loss": 18.9277, "step": 133 }, { "epoch": 0.0024494123238342443, "grad_norm": 9.308163019342764, "learning_rate": 8.103579588728105e-07, "loss": 18.9868, "step": 134 }, { "epoch": 0.0024676915202807683, "grad_norm": 8.775269468740735, "learning_rate": 8.164508758568165e-07, "loss": 18.3321, "step": 135 }, { "epoch": 0.0024859707167272926, "grad_norm": 9.4470228819566, "learning_rate": 8.225437928408227e-07, "loss": 18.7455, "step": 136 }, { "epoch": 0.002504249913173817, "grad_norm": 8.587540626226483, "learning_rate": 8.286367098248287e-07, "loss": 18.2026, "step": 137 }, { "epoch": 0.002522529109620341, "grad_norm": 9.605233117505259, "learning_rate": 8.347296268088348e-07, "loss": 18.452, "step": 138 }, { "epoch": 0.0025408083060668653, "grad_norm": 9.439766715402934, "learning_rate": 8.408225437928409e-07, "loss": 18.5597, "step": 139 }, { "epoch": 0.0025590875025133897, "grad_norm": 8.865103055248236, "learning_rate": 8.46915460776847e-07, "loss": 18.1067, "step": 140 }, { "epoch": 0.0025773666989599136, "grad_norm": 8.963452516669594, "learning_rate": 8.530083777608531e-07, "loss": 18.3106, "step": 141 }, { "epoch": 0.002595645895406438, "grad_norm": 9.31260967372063, "learning_rate": 8.591012947448592e-07, "loss": 18.1281, "step": 142 }, { "epoch": 0.0026139250918529623, "grad_norm": 9.983750461174719, "learning_rate": 8.651942117288653e-07, "loss": 18.9052, "step": 143 }, { "epoch": 0.0026322042882994862, "grad_norm": 8.525711362497992, "learning_rate": 8.712871287128713e-07, "loss": 17.9311, "step": 144 }, { "epoch": 0.0026504834847460106, "grad_norm": 8.16279592260429, "learning_rate": 8.773800456968774e-07, "loss": 18.0707, "step": 145 }, { "epoch": 0.002668762681192535, "grad_norm": 10.810956985437747, "learning_rate": 8.834729626808836e-07, "loss": 19.6098, "step": 146 }, { "epoch": 0.002687041877639059, "grad_norm": 8.418666058583149, "learning_rate": 8.895658796648897e-07, "loss": 18.3767, "step": 147 }, { "epoch": 0.0027053210740855833, "grad_norm": 7.566753590641935, "learning_rate": 8.956587966488957e-07, "loss": 17.986, "step": 148 }, { "epoch": 0.002723600270532107, "grad_norm": 8.820077881486961, "learning_rate": 9.017517136329018e-07, "loss": 17.9399, "step": 149 }, { "epoch": 0.0027418794669786316, "grad_norm": 7.835441517740366, "learning_rate": 9.07844630616908e-07, "loss": 18.1436, "step": 150 }, { "epoch": 0.002760158663425156, "grad_norm": 10.311120459482858, "learning_rate": 9.139375476009141e-07, "loss": 19.287, "step": 151 }, { "epoch": 0.00277843785987168, "grad_norm": 9.180260647695155, "learning_rate": 9.2003046458492e-07, "loss": 18.2681, "step": 152 }, { "epoch": 0.0027967170563182042, "grad_norm": 9.223803948298361, "learning_rate": 9.261233815689262e-07, "loss": 18.5626, "step": 153 }, { "epoch": 0.0028149962527647286, "grad_norm": 9.743220538497313, "learning_rate": 9.322162985529323e-07, "loss": 18.5406, "step": 154 }, { "epoch": 0.0028332754492112525, "grad_norm": 8.421411343466511, "learning_rate": 9.383092155369384e-07, "loss": 18.3646, "step": 155 }, { "epoch": 0.002851554645657777, "grad_norm": 8.990896416587637, "learning_rate": 9.444021325209444e-07, "loss": 18.4719, "step": 156 }, { "epoch": 0.0028698338421043012, "grad_norm": 8.754872875971705, "learning_rate": 9.504950495049506e-07, "loss": 18.2906, "step": 157 }, { "epoch": 0.002888113038550825, "grad_norm": 9.710686470408715, "learning_rate": 9.565879664889568e-07, "loss": 18.6634, "step": 158 }, { "epoch": 0.0029063922349973495, "grad_norm": 8.724629853904714, "learning_rate": 9.626808834729628e-07, "loss": 18.9012, "step": 159 }, { "epoch": 0.002924671431443874, "grad_norm": 9.188575498830037, "learning_rate": 9.687738004569687e-07, "loss": 18.2614, "step": 160 }, { "epoch": 0.002942950627890398, "grad_norm": 11.013102190121193, "learning_rate": 9.74866717440975e-07, "loss": 19.1223, "step": 161 }, { "epoch": 0.002961229824336922, "grad_norm": 8.394700341610353, "learning_rate": 9.809596344249811e-07, "loss": 18.1358, "step": 162 }, { "epoch": 0.0029795090207834466, "grad_norm": 8.069990243816974, "learning_rate": 9.87052551408987e-07, "loss": 17.8555, "step": 163 }, { "epoch": 0.0029977882172299705, "grad_norm": 9.303198108361302, "learning_rate": 9.931454683929933e-07, "loss": 18.468, "step": 164 }, { "epoch": 0.003016067413676495, "grad_norm": 8.335228410252673, "learning_rate": 9.992383853769994e-07, "loss": 18.0916, "step": 165 }, { "epoch": 0.0030343466101230188, "grad_norm": 8.432740981655469, "learning_rate": 1.0053313023610054e-06, "loss": 18.0996, "step": 166 }, { "epoch": 0.003052625806569543, "grad_norm": 9.519777017936155, "learning_rate": 1.0114242193450114e-06, "loss": 19.1096, "step": 167 }, { "epoch": 0.0030709050030160675, "grad_norm": 8.065844106323338, "learning_rate": 1.0175171363290176e-06, "loss": 18.1032, "step": 168 }, { "epoch": 0.0030891841994625914, "grad_norm": 9.163532440473222, "learning_rate": 1.0236100533130238e-06, "loss": 18.282, "step": 169 }, { "epoch": 0.003107463395909116, "grad_norm": 9.388304585526452, "learning_rate": 1.0297029702970297e-06, "loss": 18.6834, "step": 170 }, { "epoch": 0.00312574259235564, "grad_norm": 8.87715952646457, "learning_rate": 1.035795887281036e-06, "loss": 18.477, "step": 171 }, { "epoch": 0.003144021788802164, "grad_norm": 26.114301441663105, "learning_rate": 1.041888804265042e-06, "loss": 19.293, "step": 172 }, { "epoch": 0.0031623009852486885, "grad_norm": 9.694088824254733, "learning_rate": 1.047981721249048e-06, "loss": 18.7479, "step": 173 }, { "epoch": 0.003180580181695213, "grad_norm": 9.095561608266575, "learning_rate": 1.054074638233054e-06, "loss": 18.3212, "step": 174 }, { "epoch": 0.0031988593781417368, "grad_norm": 9.421354731090213, "learning_rate": 1.0601675552170602e-06, "loss": 18.6331, "step": 175 }, { "epoch": 0.003217138574588261, "grad_norm": 10.830755674328625, "learning_rate": 1.0662604722010664e-06, "loss": 19.3953, "step": 176 }, { "epoch": 0.0032354177710347855, "grad_norm": 8.334796100622082, "learning_rate": 1.0723533891850726e-06, "loss": 18.0036, "step": 177 }, { "epoch": 0.0032536969674813094, "grad_norm": 8.274933800538554, "learning_rate": 1.0784463061690785e-06, "loss": 18.0767, "step": 178 }, { "epoch": 0.0032719761639278338, "grad_norm": 8.952990668764464, "learning_rate": 1.0845392231530847e-06, "loss": 18.3319, "step": 179 }, { "epoch": 0.003290255360374358, "grad_norm": 7.448210693608828, "learning_rate": 1.0906321401370907e-06, "loss": 17.8476, "step": 180 }, { "epoch": 0.003308534556820882, "grad_norm": 11.033797657019463, "learning_rate": 1.0967250571210967e-06, "loss": 19.3991, "step": 181 }, { "epoch": 0.0033268137532674064, "grad_norm": 10.099842515451865, "learning_rate": 1.1028179741051028e-06, "loss": 19.1332, "step": 182 }, { "epoch": 0.0033450929497139304, "grad_norm": 9.208122392507601, "learning_rate": 1.108910891089109e-06, "loss": 18.6594, "step": 183 }, { "epoch": 0.0033633721461604547, "grad_norm": 9.323685684955574, "learning_rate": 1.1150038080731152e-06, "loss": 19.0113, "step": 184 }, { "epoch": 0.003381651342606979, "grad_norm": 9.48121086474806, "learning_rate": 1.1210967250571212e-06, "loss": 18.2684, "step": 185 }, { "epoch": 0.003399930539053503, "grad_norm": 9.458692967694427, "learning_rate": 1.1271896420411274e-06, "loss": 18.5252, "step": 186 }, { "epoch": 0.0034182097355000274, "grad_norm": 10.162976696658363, "learning_rate": 1.1332825590251333e-06, "loss": 18.7188, "step": 187 }, { "epoch": 0.0034364889319465517, "grad_norm": 7.536693138762417, "learning_rate": 1.1393754760091395e-06, "loss": 17.7561, "step": 188 }, { "epoch": 0.0034547681283930757, "grad_norm": 10.261621116891442, "learning_rate": 1.1454683929931455e-06, "loss": 19.0036, "step": 189 }, { "epoch": 0.0034730473248396, "grad_norm": 9.211901187355524, "learning_rate": 1.1515613099771517e-06, "loss": 18.6356, "step": 190 }, { "epoch": 0.0034913265212861244, "grad_norm": 9.798349541549694, "learning_rate": 1.1576542269611578e-06, "loss": 19.301, "step": 191 }, { "epoch": 0.0035096057177326483, "grad_norm": 9.408807780556515, "learning_rate": 1.1637471439451638e-06, "loss": 18.2518, "step": 192 }, { "epoch": 0.0035278849141791727, "grad_norm": 8.703383551292617, "learning_rate": 1.16984006092917e-06, "loss": 18.2204, "step": 193 }, { "epoch": 0.003546164110625697, "grad_norm": 9.964051574549913, "learning_rate": 1.175932977913176e-06, "loss": 18.758, "step": 194 }, { "epoch": 0.003564443307072221, "grad_norm": 8.45774454932824, "learning_rate": 1.1820258948971822e-06, "loss": 18.0124, "step": 195 }, { "epoch": 0.0035827225035187454, "grad_norm": 9.41053515094707, "learning_rate": 1.1881188118811881e-06, "loss": 18.6049, "step": 196 }, { "epoch": 0.0036010016999652697, "grad_norm": 7.926905776853164, "learning_rate": 1.1942117288651943e-06, "loss": 17.9763, "step": 197 }, { "epoch": 0.0036192808964117936, "grad_norm": 7.762301016854572, "learning_rate": 1.2003046458492005e-06, "loss": 18.0592, "step": 198 }, { "epoch": 0.003637560092858318, "grad_norm": 8.298271358933013, "learning_rate": 1.2063975628332065e-06, "loss": 18.0102, "step": 199 }, { "epoch": 0.0036558392893048424, "grad_norm": 10.730216617638389, "learning_rate": 1.2124904798172126e-06, "loss": 19.0529, "step": 200 }, { "epoch": 0.0036741184857513663, "grad_norm": 8.315309216617923, "learning_rate": 1.2185833968012186e-06, "loss": 18.2883, "step": 201 }, { "epoch": 0.0036923976821978907, "grad_norm": 9.937267774764383, "learning_rate": 1.2246763137852248e-06, "loss": 18.8833, "step": 202 }, { "epoch": 0.0037106768786444146, "grad_norm": 8.298280967026155, "learning_rate": 1.230769230769231e-06, "loss": 18.4287, "step": 203 }, { "epoch": 0.003728956075090939, "grad_norm": 8.872669745889823, "learning_rate": 1.236862147753237e-06, "loss": 18.3237, "step": 204 }, { "epoch": 0.0037472352715374633, "grad_norm": 9.26640734442622, "learning_rate": 1.2429550647372431e-06, "loss": 18.5356, "step": 205 }, { "epoch": 0.0037655144679839873, "grad_norm": 9.193263429338693, "learning_rate": 1.249047981721249e-06, "loss": 18.1574, "step": 206 }, { "epoch": 0.0037837936644305116, "grad_norm": 9.406715721001522, "learning_rate": 1.2551408987052553e-06, "loss": 18.5967, "step": 207 }, { "epoch": 0.003802072860877036, "grad_norm": 8.430542392894331, "learning_rate": 1.2612338156892615e-06, "loss": 18.4397, "step": 208 }, { "epoch": 0.00382035205732356, "grad_norm": 8.324568367394546, "learning_rate": 1.2673267326732674e-06, "loss": 18.1317, "step": 209 }, { "epoch": 0.0038386312537700843, "grad_norm": 8.636707220640757, "learning_rate": 1.2734196496572734e-06, "loss": 18.2539, "step": 210 }, { "epoch": 0.0038569104502166086, "grad_norm": 8.559056941183735, "learning_rate": 1.2795125666412798e-06, "loss": 18.1112, "step": 211 }, { "epoch": 0.0038751896466631326, "grad_norm": 8.349106399691129, "learning_rate": 1.2856054836252858e-06, "loss": 18.0289, "step": 212 }, { "epoch": 0.003893468843109657, "grad_norm": 9.109517130979178, "learning_rate": 1.2916984006092917e-06, "loss": 18.4914, "step": 213 }, { "epoch": 0.003911748039556181, "grad_norm": 10.17773107995757, "learning_rate": 1.297791317593298e-06, "loss": 18.686, "step": 214 }, { "epoch": 0.003930027236002705, "grad_norm": 8.880061859485329, "learning_rate": 1.3038842345773039e-06, "loss": 18.1707, "step": 215 }, { "epoch": 0.00394830643244923, "grad_norm": 10.41268299096758, "learning_rate": 1.3099771515613103e-06, "loss": 19.4722, "step": 216 }, { "epoch": 0.003966585628895754, "grad_norm": 9.19944078898899, "learning_rate": 1.3160700685453163e-06, "loss": 18.5689, "step": 217 }, { "epoch": 0.003984864825342278, "grad_norm": 8.52480330975333, "learning_rate": 1.3221629855293222e-06, "loss": 18.3901, "step": 218 }, { "epoch": 0.004003144021788802, "grad_norm": 9.08474388291791, "learning_rate": 1.3282559025133284e-06, "loss": 18.4845, "step": 219 }, { "epoch": 0.004021423218235326, "grad_norm": 9.937722849785393, "learning_rate": 1.3343488194973344e-06, "loss": 18.5381, "step": 220 }, { "epoch": 0.0040397024146818505, "grad_norm": 9.6690554776204, "learning_rate": 1.3404417364813406e-06, "loss": 18.1501, "step": 221 }, { "epoch": 0.004057981611128375, "grad_norm": 8.28729947943615, "learning_rate": 1.3465346534653467e-06, "loss": 18.0598, "step": 222 }, { "epoch": 0.004076260807574899, "grad_norm": 9.286936139854614, "learning_rate": 1.3526275704493527e-06, "loss": 18.5233, "step": 223 }, { "epoch": 0.004094540004021424, "grad_norm": 10.265707040677311, "learning_rate": 1.3587204874333587e-06, "loss": 18.3456, "step": 224 }, { "epoch": 0.004112819200467947, "grad_norm": 9.052625229520075, "learning_rate": 1.364813404417365e-06, "loss": 18.1519, "step": 225 }, { "epoch": 0.0041310983969144715, "grad_norm": 9.00678834172409, "learning_rate": 1.370906321401371e-06, "loss": 18.598, "step": 226 }, { "epoch": 0.004149377593360996, "grad_norm": 9.73419082794281, "learning_rate": 1.3769992383853772e-06, "loss": 18.5196, "step": 227 }, { "epoch": 0.00416765678980752, "grad_norm": 9.43062878041649, "learning_rate": 1.3830921553693832e-06, "loss": 18.437, "step": 228 }, { "epoch": 0.004185935986254045, "grad_norm": 8.860809691215216, "learning_rate": 1.3891850723533892e-06, "loss": 18.2498, "step": 229 }, { "epoch": 0.004204215182700568, "grad_norm": 8.461344696554631, "learning_rate": 1.3952779893373956e-06, "loss": 18.3432, "step": 230 }, { "epoch": 0.0042224943791470924, "grad_norm": 8.923589818596337, "learning_rate": 1.4013709063214015e-06, "loss": 18.1395, "step": 231 }, { "epoch": 0.004240773575593617, "grad_norm": 8.70195064898488, "learning_rate": 1.4074638233054075e-06, "loss": 18.0284, "step": 232 }, { "epoch": 0.004259052772040141, "grad_norm": 9.260996824567075, "learning_rate": 1.4135567402894137e-06, "loss": 18.4668, "step": 233 }, { "epoch": 0.0042773319684866655, "grad_norm": 9.055537513006174, "learning_rate": 1.4196496572734197e-06, "loss": 18.1067, "step": 234 }, { "epoch": 0.00429561116493319, "grad_norm": 8.58620539390862, "learning_rate": 1.4257425742574258e-06, "loss": 17.9723, "step": 235 }, { "epoch": 0.004313890361379713, "grad_norm": 9.219953464408006, "learning_rate": 1.431835491241432e-06, "loss": 18.6434, "step": 236 }, { "epoch": 0.004332169557826238, "grad_norm": 9.00950759799232, "learning_rate": 1.437928408225438e-06, "loss": 18.3838, "step": 237 }, { "epoch": 0.004350448754272762, "grad_norm": 8.951165357685648, "learning_rate": 1.4440213252094442e-06, "loss": 18.0981, "step": 238 }, { "epoch": 0.0043687279507192865, "grad_norm": 10.090790708095982, "learning_rate": 1.4501142421934503e-06, "loss": 18.4739, "step": 239 }, { "epoch": 0.004387007147165811, "grad_norm": 8.43510075285029, "learning_rate": 1.4562071591774563e-06, "loss": 18.1353, "step": 240 }, { "epoch": 0.004405286343612335, "grad_norm": 8.575581867632986, "learning_rate": 1.4623000761614625e-06, "loss": 18.0785, "step": 241 }, { "epoch": 0.004423565540058859, "grad_norm": 7.335566930392319, "learning_rate": 1.4683929931454685e-06, "loss": 17.9746, "step": 242 }, { "epoch": 0.004441844736505383, "grad_norm": 8.967314116339633, "learning_rate": 1.4744859101294744e-06, "loss": 18.5792, "step": 243 }, { "epoch": 0.0044601239329519074, "grad_norm": 9.172005768183263, "learning_rate": 1.4805788271134808e-06, "loss": 18.1535, "step": 244 }, { "epoch": 0.004478403129398432, "grad_norm": 11.596045287093936, "learning_rate": 1.4866717440974868e-06, "loss": 19.2335, "step": 245 }, { "epoch": 0.004496682325844956, "grad_norm": 9.252752460154074, "learning_rate": 1.4927646610814928e-06, "loss": 18.8617, "step": 246 }, { "epoch": 0.00451496152229148, "grad_norm": 8.579697314813979, "learning_rate": 1.498857578065499e-06, "loss": 18.2312, "step": 247 }, { "epoch": 0.004533240718738004, "grad_norm": 7.887835186000388, "learning_rate": 1.504950495049505e-06, "loss": 17.8583, "step": 248 }, { "epoch": 0.004551519915184528, "grad_norm": 9.296133202137408, "learning_rate": 1.5110434120335113e-06, "loss": 18.5319, "step": 249 }, { "epoch": 0.004569799111631053, "grad_norm": 9.58481341796332, "learning_rate": 1.5171363290175173e-06, "loss": 19.2434, "step": 250 }, { "epoch": 0.004588078308077577, "grad_norm": 7.905919594260702, "learning_rate": 1.5232292460015233e-06, "loss": 18.1045, "step": 251 }, { "epoch": 0.0046063575045241015, "grad_norm": 8.59126376514489, "learning_rate": 1.5293221629855294e-06, "loss": 18.3831, "step": 252 }, { "epoch": 0.004624636700970625, "grad_norm": 8.375949981654848, "learning_rate": 1.5354150799695356e-06, "loss": 18.3196, "step": 253 }, { "epoch": 0.004642915897417149, "grad_norm": 8.653570175094352, "learning_rate": 1.5415079969535416e-06, "loss": 18.6534, "step": 254 }, { "epoch": 0.004661195093863674, "grad_norm": 8.793798043219379, "learning_rate": 1.5476009139375478e-06, "loss": 18.3333, "step": 255 }, { "epoch": 0.004679474290310198, "grad_norm": 9.311086736443427, "learning_rate": 1.5536938309215537e-06, "loss": 18.7836, "step": 256 }, { "epoch": 0.004697753486756722, "grad_norm": 9.503701462288985, "learning_rate": 1.5597867479055597e-06, "loss": 18.6113, "step": 257 }, { "epoch": 0.004716032683203247, "grad_norm": 9.64408701783925, "learning_rate": 1.5658796648895661e-06, "loss": 18.3561, "step": 258 }, { "epoch": 0.00473431187964977, "grad_norm": 7.412750251028354, "learning_rate": 1.571972581873572e-06, "loss": 17.6636, "step": 259 }, { "epoch": 0.004752591076096295, "grad_norm": 8.565521370367357, "learning_rate": 1.5780654988575783e-06, "loss": 18.2246, "step": 260 }, { "epoch": 0.004770870272542819, "grad_norm": 9.021155353544811, "learning_rate": 1.5841584158415842e-06, "loss": 18.8278, "step": 261 }, { "epoch": 0.004789149468989343, "grad_norm": 8.911016454755051, "learning_rate": 1.5902513328255902e-06, "loss": 17.9716, "step": 262 }, { "epoch": 0.004807428665435868, "grad_norm": 9.753553688000444, "learning_rate": 1.5963442498095966e-06, "loss": 18.6498, "step": 263 }, { "epoch": 0.004825707861882391, "grad_norm": 9.410473633134915, "learning_rate": 1.6024371667936026e-06, "loss": 18.7673, "step": 264 }, { "epoch": 0.004843987058328916, "grad_norm": 8.841939654519999, "learning_rate": 1.6085300837776085e-06, "loss": 18.7812, "step": 265 }, { "epoch": 0.00486226625477544, "grad_norm": 10.138007808406874, "learning_rate": 1.6146230007616147e-06, "loss": 18.2232, "step": 266 }, { "epoch": 0.004880545451221964, "grad_norm": 7.882651548223116, "learning_rate": 1.620715917745621e-06, "loss": 18.0703, "step": 267 }, { "epoch": 0.004898824647668489, "grad_norm": 7.30363490583701, "learning_rate": 1.6268088347296269e-06, "loss": 17.7259, "step": 268 }, { "epoch": 0.004917103844115013, "grad_norm": 7.617273837055213, "learning_rate": 1.632901751713633e-06, "loss": 18.2572, "step": 269 }, { "epoch": 0.0049353830405615366, "grad_norm": 9.61420946928789, "learning_rate": 1.638994668697639e-06, "loss": 18.7611, "step": 270 }, { "epoch": 0.004953662237008061, "grad_norm": 9.261819370122486, "learning_rate": 1.6450875856816454e-06, "loss": 18.2477, "step": 271 }, { "epoch": 0.004971941433454585, "grad_norm": 8.86145477554711, "learning_rate": 1.6511805026656514e-06, "loss": 18.3001, "step": 272 }, { "epoch": 0.00499022062990111, "grad_norm": 10.384890389543454, "learning_rate": 1.6572734196496574e-06, "loss": 19.3691, "step": 273 }, { "epoch": 0.005008499826347634, "grad_norm": 8.430726901053445, "learning_rate": 1.6633663366336635e-06, "loss": 18.0541, "step": 274 }, { "epoch": 0.005026779022794158, "grad_norm": 8.983040809310916, "learning_rate": 1.6694592536176695e-06, "loss": 18.1607, "step": 275 }, { "epoch": 0.005045058219240682, "grad_norm": 8.829373292406805, "learning_rate": 1.6755521706016755e-06, "loss": 19.0493, "step": 276 }, { "epoch": 0.005063337415687206, "grad_norm": 8.995825308388472, "learning_rate": 1.6816450875856819e-06, "loss": 18.2772, "step": 277 }, { "epoch": 0.005081616612133731, "grad_norm": 9.21103924369425, "learning_rate": 1.6877380045696878e-06, "loss": 18.4878, "step": 278 }, { "epoch": 0.005099895808580255, "grad_norm": 8.451162184646366, "learning_rate": 1.693830921553694e-06, "loss": 18.3464, "step": 279 }, { "epoch": 0.005118175005026779, "grad_norm": 11.161474200142381, "learning_rate": 1.6999238385377e-06, "loss": 18.4198, "step": 280 }, { "epoch": 0.005136454201473303, "grad_norm": 9.183530687167856, "learning_rate": 1.7060167555217062e-06, "loss": 18.1488, "step": 281 }, { "epoch": 0.005154733397919827, "grad_norm": 8.647176482887394, "learning_rate": 1.7121096725057124e-06, "loss": 18.1579, "step": 282 }, { "epoch": 0.0051730125943663515, "grad_norm": 8.517489610499862, "learning_rate": 1.7182025894897183e-06, "loss": 18.3083, "step": 283 }, { "epoch": 0.005191291790812876, "grad_norm": 8.675364535322364, "learning_rate": 1.7242955064737243e-06, "loss": 18.0499, "step": 284 }, { "epoch": 0.0052095709872594, "grad_norm": 7.709633922459531, "learning_rate": 1.7303884234577307e-06, "loss": 17.971, "step": 285 }, { "epoch": 0.005227850183705925, "grad_norm": 7.786420777908942, "learning_rate": 1.7364813404417367e-06, "loss": 17.9058, "step": 286 }, { "epoch": 0.005246129380152448, "grad_norm": 9.372489454130031, "learning_rate": 1.7425742574257426e-06, "loss": 18.5079, "step": 287 }, { "epoch": 0.0052644085765989725, "grad_norm": 8.644590798969862, "learning_rate": 1.7486671744097488e-06, "loss": 18.163, "step": 288 }, { "epoch": 0.005282687773045497, "grad_norm": 7.555436323653494, "learning_rate": 1.7547600913937548e-06, "loss": 17.7009, "step": 289 }, { "epoch": 0.005300966969492021, "grad_norm": 9.88718105471376, "learning_rate": 1.7608530083777612e-06, "loss": 18.5575, "step": 290 }, { "epoch": 0.005319246165938546, "grad_norm": 9.61361164826843, "learning_rate": 1.7669459253617672e-06, "loss": 18.4487, "step": 291 }, { "epoch": 0.00533752536238507, "grad_norm": 9.052047294027663, "learning_rate": 1.7730388423457731e-06, "loss": 18.5464, "step": 292 }, { "epoch": 0.0053558045588315934, "grad_norm": 8.356682986131199, "learning_rate": 1.7791317593297793e-06, "loss": 18.055, "step": 293 }, { "epoch": 0.005374083755278118, "grad_norm": 8.852621554464067, "learning_rate": 1.7852246763137853e-06, "loss": 18.463, "step": 294 }, { "epoch": 0.005392362951724642, "grad_norm": 8.885811095965344, "learning_rate": 1.7913175932977915e-06, "loss": 18.1471, "step": 295 }, { "epoch": 0.0054106421481711665, "grad_norm": 8.926621838046268, "learning_rate": 1.7974105102817976e-06, "loss": 18.4295, "step": 296 }, { "epoch": 0.005428921344617691, "grad_norm": 8.210608181416873, "learning_rate": 1.8035034272658036e-06, "loss": 18.4575, "step": 297 }, { "epoch": 0.005447200541064214, "grad_norm": 9.173397538987054, "learning_rate": 1.8095963442498096e-06, "loss": 18.2811, "step": 298 }, { "epoch": 0.005465479737510739, "grad_norm": 8.352517539856125, "learning_rate": 1.815689261233816e-06, "loss": 17.9896, "step": 299 }, { "epoch": 0.005483758933957263, "grad_norm": 9.58651656979235, "learning_rate": 1.821782178217822e-06, "loss": 18.5783, "step": 300 }, { "epoch": 0.0055020381304037875, "grad_norm": 9.415882626435163, "learning_rate": 1.8278750952018281e-06, "loss": 18.2743, "step": 301 }, { "epoch": 0.005520317326850312, "grad_norm": 9.350881387259488, "learning_rate": 1.833968012185834e-06, "loss": 18.5922, "step": 302 }, { "epoch": 0.005538596523296836, "grad_norm": 9.428154148379228, "learning_rate": 1.84006092916984e-06, "loss": 18.6393, "step": 303 }, { "epoch": 0.00555687571974336, "grad_norm": 8.481958715323854, "learning_rate": 1.8461538461538465e-06, "loss": 18.031, "step": 304 }, { "epoch": 0.005575154916189884, "grad_norm": 8.183490386485298, "learning_rate": 1.8522467631378524e-06, "loss": 18.0689, "step": 305 }, { "epoch": 0.0055934341126364084, "grad_norm": 8.527268923870656, "learning_rate": 1.8583396801218584e-06, "loss": 18.4661, "step": 306 }, { "epoch": 0.005611713309082933, "grad_norm": 7.812079110964028, "learning_rate": 1.8644325971058646e-06, "loss": 17.8002, "step": 307 }, { "epoch": 0.005629992505529457, "grad_norm": 8.41008868031268, "learning_rate": 1.8705255140898706e-06, "loss": 18.105, "step": 308 }, { "epoch": 0.0056482717019759815, "grad_norm": 8.814591940500186, "learning_rate": 1.8766184310738767e-06, "loss": 18.508, "step": 309 }, { "epoch": 0.005666550898422505, "grad_norm": 9.81463607128983, "learning_rate": 1.882711348057883e-06, "loss": 18.5906, "step": 310 }, { "epoch": 0.005684830094869029, "grad_norm": 9.259484237090978, "learning_rate": 1.8888042650418889e-06, "loss": 18.2118, "step": 311 }, { "epoch": 0.005703109291315554, "grad_norm": 8.229928076135344, "learning_rate": 1.8948971820258953e-06, "loss": 17.8685, "step": 312 }, { "epoch": 0.005721388487762078, "grad_norm": 9.328445386502587, "learning_rate": 1.9009900990099013e-06, "loss": 18.5938, "step": 313 }, { "epoch": 0.0057396676842086025, "grad_norm": 10.30968060318531, "learning_rate": 1.9070830159939072e-06, "loss": 19.0348, "step": 314 }, { "epoch": 0.005757946880655126, "grad_norm": 9.588842801909218, "learning_rate": 1.9131759329779136e-06, "loss": 18.6145, "step": 315 }, { "epoch": 0.00577622607710165, "grad_norm": 9.160642251033686, "learning_rate": 1.9192688499619194e-06, "loss": 18.4347, "step": 316 }, { "epoch": 0.005794505273548175, "grad_norm": 9.510478117148674, "learning_rate": 1.9253617669459256e-06, "loss": 18.4841, "step": 317 }, { "epoch": 0.005812784469994699, "grad_norm": 7.720316738121152, "learning_rate": 1.9314546839299317e-06, "loss": 17.8745, "step": 318 }, { "epoch": 0.005831063666441223, "grad_norm": 8.380398366818962, "learning_rate": 1.9375476009139375e-06, "loss": 18.0486, "step": 319 }, { "epoch": 0.005849342862887748, "grad_norm": 9.883905650411798, "learning_rate": 1.9436405178979437e-06, "loss": 18.8674, "step": 320 }, { "epoch": 0.005867622059334271, "grad_norm": 8.708964463668561, "learning_rate": 1.94973343488195e-06, "loss": 18.4288, "step": 321 }, { "epoch": 0.005885901255780796, "grad_norm": 9.297137906751821, "learning_rate": 1.955826351865956e-06, "loss": 19.0322, "step": 322 }, { "epoch": 0.00590418045222732, "grad_norm": 8.618946541704723, "learning_rate": 1.9619192688499622e-06, "loss": 18.214, "step": 323 }, { "epoch": 0.005922459648673844, "grad_norm": 8.060235940220567, "learning_rate": 1.968012185833968e-06, "loss": 17.9303, "step": 324 }, { "epoch": 0.005940738845120369, "grad_norm": 7.665853382725324, "learning_rate": 1.974105102817974e-06, "loss": 17.8826, "step": 325 }, { "epoch": 0.005959018041566893, "grad_norm": 8.121066064855041, "learning_rate": 1.9801980198019803e-06, "loss": 18.1789, "step": 326 }, { "epoch": 0.005977297238013417, "grad_norm": 8.541733313908619, "learning_rate": 1.9862909367859865e-06, "loss": 18.6535, "step": 327 }, { "epoch": 0.005995576434459941, "grad_norm": 8.531476217309224, "learning_rate": 1.9923838537699923e-06, "loss": 18.0689, "step": 328 }, { "epoch": 0.006013855630906465, "grad_norm": 9.387532432564992, "learning_rate": 1.998476770753999e-06, "loss": 18.4685, "step": 329 }, { "epoch": 0.00603213482735299, "grad_norm": 8.560597959419821, "learning_rate": 2.0045696877380047e-06, "loss": 18.1564, "step": 330 }, { "epoch": 0.006050414023799514, "grad_norm": 8.543531262561194, "learning_rate": 2.010662604722011e-06, "loss": 18.0159, "step": 331 }, { "epoch": 0.0060686932202460376, "grad_norm": 9.089859495341674, "learning_rate": 2.016755521706017e-06, "loss": 18.4976, "step": 332 }, { "epoch": 0.006086972416692562, "grad_norm": 7.892896022706285, "learning_rate": 2.0228484386900228e-06, "loss": 18.1215, "step": 333 }, { "epoch": 0.006105251613139086, "grad_norm": 9.459962297194554, "learning_rate": 2.0289413556740294e-06, "loss": 18.4911, "step": 334 }, { "epoch": 0.006123530809585611, "grad_norm": 9.975390120935543, "learning_rate": 2.035034272658035e-06, "loss": 18.3436, "step": 335 }, { "epoch": 0.006141810006032135, "grad_norm": 9.206433781385257, "learning_rate": 2.0411271896420413e-06, "loss": 18.6202, "step": 336 }, { "epoch": 0.006160089202478659, "grad_norm": 9.225056037073902, "learning_rate": 2.0472201066260475e-06, "loss": 18.5441, "step": 337 }, { "epoch": 0.006178368398925183, "grad_norm": 8.79411752064298, "learning_rate": 2.0533130236100533e-06, "loss": 18.2518, "step": 338 }, { "epoch": 0.006196647595371707, "grad_norm": 7.682661005049543, "learning_rate": 2.0594059405940594e-06, "loss": 17.9822, "step": 339 }, { "epoch": 0.006214926791818232, "grad_norm": 8.617648520128705, "learning_rate": 2.0654988575780656e-06, "loss": 18.1993, "step": 340 }, { "epoch": 0.006233205988264756, "grad_norm": 9.284518303747348, "learning_rate": 2.071591774562072e-06, "loss": 18.3468, "step": 341 }, { "epoch": 0.00625148518471128, "grad_norm": 9.146487062937387, "learning_rate": 2.077684691546078e-06, "loss": 18.6132, "step": 342 }, { "epoch": 0.006269764381157805, "grad_norm": 8.55358864306958, "learning_rate": 2.083777608530084e-06, "loss": 18.2002, "step": 343 }, { "epoch": 0.006288043577604328, "grad_norm": 10.309465924549984, "learning_rate": 2.08987052551409e-06, "loss": 18.7386, "step": 344 }, { "epoch": 0.0063063227740508526, "grad_norm": 8.050315978102859, "learning_rate": 2.095963442498096e-06, "loss": 18.0105, "step": 345 }, { "epoch": 0.006324601970497377, "grad_norm": 9.290651889149846, "learning_rate": 2.1020563594821023e-06, "loss": 18.4974, "step": 346 }, { "epoch": 0.006342881166943901, "grad_norm": 8.017310403022595, "learning_rate": 2.108149276466108e-06, "loss": 18.0765, "step": 347 }, { "epoch": 0.006361160363390426, "grad_norm": 8.43604073342653, "learning_rate": 2.1142421934501147e-06, "loss": 18.2749, "step": 348 }, { "epoch": 0.006379439559836949, "grad_norm": 8.412960841835288, "learning_rate": 2.1203351104341204e-06, "loss": 18.1562, "step": 349 }, { "epoch": 0.0063977187562834735, "grad_norm": 8.57195602786084, "learning_rate": 2.1264280274181266e-06, "loss": 18.2799, "step": 350 }, { "epoch": 0.006415997952729998, "grad_norm": 7.794052012116019, "learning_rate": 2.1325209444021328e-06, "loss": 17.8709, "step": 351 }, { "epoch": 0.006434277149176522, "grad_norm": 9.982820168942267, "learning_rate": 2.1386138613861385e-06, "loss": 18.6899, "step": 352 }, { "epoch": 0.006452556345623047, "grad_norm": 10.415493642023899, "learning_rate": 2.144706778370145e-06, "loss": 18.9632, "step": 353 }, { "epoch": 0.006470835542069571, "grad_norm": 9.477248548554387, "learning_rate": 2.150799695354151e-06, "loss": 18.648, "step": 354 }, { "epoch": 0.0064891147385160945, "grad_norm": 9.470811841953198, "learning_rate": 2.156892612338157e-06, "loss": 18.5502, "step": 355 }, { "epoch": 0.006507393934962619, "grad_norm": 8.463485062752705, "learning_rate": 2.1629855293221633e-06, "loss": 18.2422, "step": 356 }, { "epoch": 0.006525673131409143, "grad_norm": 8.813364649737569, "learning_rate": 2.1690784463061694e-06, "loss": 18.5201, "step": 357 }, { "epoch": 0.0065439523278556675, "grad_norm": 9.680619522560455, "learning_rate": 2.175171363290175e-06, "loss": 18.7938, "step": 358 }, { "epoch": 0.006562231524302192, "grad_norm": 7.425556851346463, "learning_rate": 2.1812642802741814e-06, "loss": 17.9377, "step": 359 }, { "epoch": 0.006580510720748716, "grad_norm": 9.254648716313772, "learning_rate": 2.1873571972581876e-06, "loss": 19.1159, "step": 360 }, { "epoch": 0.00659878991719524, "grad_norm": 8.771527877599317, "learning_rate": 2.1934501142421933e-06, "loss": 18.3495, "step": 361 }, { "epoch": 0.006617069113641764, "grad_norm": 8.411301969877785, "learning_rate": 2.1995430312262e-06, "loss": 18.1057, "step": 362 }, { "epoch": 0.0066353483100882885, "grad_norm": 8.639194963524913, "learning_rate": 2.2056359482102057e-06, "loss": 18.071, "step": 363 }, { "epoch": 0.006653627506534813, "grad_norm": 8.385867594796746, "learning_rate": 2.211728865194212e-06, "loss": 18.3967, "step": 364 }, { "epoch": 0.006671906702981337, "grad_norm": 7.846216957233574, "learning_rate": 2.217821782178218e-06, "loss": 17.9985, "step": 365 }, { "epoch": 0.006690185899427861, "grad_norm": 9.3430447459956, "learning_rate": 2.223914699162224e-06, "loss": 18.239, "step": 366 }, { "epoch": 0.006708465095874385, "grad_norm": 8.99258309655721, "learning_rate": 2.2300076161462304e-06, "loss": 18.3144, "step": 367 }, { "epoch": 0.0067267442923209094, "grad_norm": 8.561083662711514, "learning_rate": 2.236100533130236e-06, "loss": 18.0658, "step": 368 }, { "epoch": 0.006745023488767434, "grad_norm": 8.278398615614147, "learning_rate": 2.2421934501142424e-06, "loss": 18.2668, "step": 369 }, { "epoch": 0.006763302685213958, "grad_norm": 9.086882339107945, "learning_rate": 2.2482863670982485e-06, "loss": 18.5646, "step": 370 }, { "epoch": 0.0067815818816604825, "grad_norm": 8.594250049493938, "learning_rate": 2.2543792840822547e-06, "loss": 18.2993, "step": 371 }, { "epoch": 0.006799861078107006, "grad_norm": 7.963099594471555, "learning_rate": 2.2604722010662605e-06, "loss": 17.8993, "step": 372 }, { "epoch": 0.00681814027455353, "grad_norm": 8.712076673097176, "learning_rate": 2.2665651180502667e-06, "loss": 18.2929, "step": 373 }, { "epoch": 0.006836419471000055, "grad_norm": 8.612392279685967, "learning_rate": 2.272658035034273e-06, "loss": 18.394, "step": 374 }, { "epoch": 0.006854698667446579, "grad_norm": 7.550688028009268, "learning_rate": 2.278750952018279e-06, "loss": 17.8218, "step": 375 }, { "epoch": 0.0068729778638931035, "grad_norm": 8.913047191507939, "learning_rate": 2.284843869002285e-06, "loss": 18.3896, "step": 376 }, { "epoch": 0.006891257060339628, "grad_norm": 8.22715602929957, "learning_rate": 2.290936785986291e-06, "loss": 17.933, "step": 377 }, { "epoch": 0.006909536256786151, "grad_norm": 7.2560412530408795, "learning_rate": 2.297029702970297e-06, "loss": 17.7516, "step": 378 }, { "epoch": 0.006927815453232676, "grad_norm": 8.175683719428385, "learning_rate": 2.3031226199543033e-06, "loss": 17.8862, "step": 379 }, { "epoch": 0.0069460946496792, "grad_norm": 9.035425554274873, "learning_rate": 2.309215536938309e-06, "loss": 18.2054, "step": 380 }, { "epoch": 0.0069643738461257244, "grad_norm": 7.955366223753293, "learning_rate": 2.3153084539223157e-06, "loss": 17.9941, "step": 381 }, { "epoch": 0.006982653042572249, "grad_norm": 8.114741230412864, "learning_rate": 2.3214013709063215e-06, "loss": 18.2079, "step": 382 }, { "epoch": 0.007000932239018773, "grad_norm": 9.849098947167999, "learning_rate": 2.3274942878903276e-06, "loss": 18.8764, "step": 383 }, { "epoch": 0.007019211435465297, "grad_norm": 8.816246921630963, "learning_rate": 2.333587204874334e-06, "loss": 18.23, "step": 384 }, { "epoch": 0.007037490631911821, "grad_norm": 8.478770203163386, "learning_rate": 2.33968012185834e-06, "loss": 18.0982, "step": 385 }, { "epoch": 0.007055769828358345, "grad_norm": 8.73342392337039, "learning_rate": 2.345773038842346e-06, "loss": 18.2808, "step": 386 }, { "epoch": 0.00707404902480487, "grad_norm": 8.551873904980154, "learning_rate": 2.351865955826352e-06, "loss": 18.4912, "step": 387 }, { "epoch": 0.007092328221251394, "grad_norm": 7.85190380133635, "learning_rate": 2.357958872810358e-06, "loss": 18.053, "step": 388 }, { "epoch": 0.007110607417697918, "grad_norm": 7.448370159944098, "learning_rate": 2.3640517897943643e-06, "loss": 17.6652, "step": 389 }, { "epoch": 0.007128886614144442, "grad_norm": 8.734311497128099, "learning_rate": 2.3701447067783705e-06, "loss": 18.2995, "step": 390 }, { "epoch": 0.007147165810590966, "grad_norm": 9.316955074087764, "learning_rate": 2.3762376237623762e-06, "loss": 19.1144, "step": 391 }, { "epoch": 0.007165445007037491, "grad_norm": 7.268656975869662, "learning_rate": 2.3823305407463824e-06, "loss": 17.8357, "step": 392 }, { "epoch": 0.007183724203484015, "grad_norm": 8.476905541872776, "learning_rate": 2.3884234577303886e-06, "loss": 17.9871, "step": 393 }, { "epoch": 0.007202003399930539, "grad_norm": 9.06955839869183, "learning_rate": 2.3945163747143944e-06, "loss": 18.0771, "step": 394 }, { "epoch": 0.007220282596377063, "grad_norm": 9.233554569688772, "learning_rate": 2.400609291698401e-06, "loss": 18.3878, "step": 395 }, { "epoch": 0.007238561792823587, "grad_norm": 9.262697112074246, "learning_rate": 2.4067022086824067e-06, "loss": 18.0157, "step": 396 }, { "epoch": 0.007256840989270112, "grad_norm": 8.806520720371429, "learning_rate": 2.412795125666413e-06, "loss": 18.2733, "step": 397 }, { "epoch": 0.007275120185716636, "grad_norm": 9.965781203723397, "learning_rate": 2.418888042650419e-06, "loss": 18.6585, "step": 398 }, { "epoch": 0.00729339938216316, "grad_norm": 7.6022212125416475, "learning_rate": 2.4249809596344253e-06, "loss": 17.5579, "step": 399 }, { "epoch": 0.007311678578609685, "grad_norm": 8.688091981163637, "learning_rate": 2.4310738766184315e-06, "loss": 18.3117, "step": 400 }, { "epoch": 0.007329957775056208, "grad_norm": 10.294080040154196, "learning_rate": 2.4371667936024372e-06, "loss": 18.8181, "step": 401 }, { "epoch": 0.007348236971502733, "grad_norm": 8.829931437470595, "learning_rate": 2.4432597105864434e-06, "loss": 18.1261, "step": 402 }, { "epoch": 0.007366516167949257, "grad_norm": 8.191147431313302, "learning_rate": 2.4493526275704496e-06, "loss": 18.0081, "step": 403 }, { "epoch": 0.007384795364395781, "grad_norm": 7.470300626118907, "learning_rate": 2.4554455445544558e-06, "loss": 17.5224, "step": 404 }, { "epoch": 0.007403074560842306, "grad_norm": 9.007826178671621, "learning_rate": 2.461538461538462e-06, "loss": 18.4673, "step": 405 }, { "epoch": 0.007421353757288829, "grad_norm": 7.5259895720074335, "learning_rate": 2.4676313785224677e-06, "loss": 18.0889, "step": 406 }, { "epoch": 0.0074396329537353536, "grad_norm": 8.50269065120894, "learning_rate": 2.473724295506474e-06, "loss": 18.4111, "step": 407 }, { "epoch": 0.007457912150181878, "grad_norm": 10.621098192229644, "learning_rate": 2.47981721249048e-06, "loss": 18.6, "step": 408 }, { "epoch": 0.007476191346628402, "grad_norm": 7.89152843253615, "learning_rate": 2.4859101294744863e-06, "loss": 17.7234, "step": 409 }, { "epoch": 0.007494470543074927, "grad_norm": 7.698598794778496, "learning_rate": 2.492003046458492e-06, "loss": 17.9908, "step": 410 }, { "epoch": 0.007512749739521451, "grad_norm": 8.585157258990362, "learning_rate": 2.498095963442498e-06, "loss": 18.3271, "step": 411 }, { "epoch": 0.0075310289359679745, "grad_norm": 8.91463773765338, "learning_rate": 2.504188880426505e-06, "loss": 18.1335, "step": 412 }, { "epoch": 0.007549308132414499, "grad_norm": 7.890245109336917, "learning_rate": 2.5102817974105106e-06, "loss": 17.9491, "step": 413 }, { "epoch": 0.007567587328861023, "grad_norm": 9.457275052109484, "learning_rate": 2.5163747143945167e-06, "loss": 18.5356, "step": 414 }, { "epoch": 0.007585866525307548, "grad_norm": 8.431178749153313, "learning_rate": 2.522467631378523e-06, "loss": 18.3072, "step": 415 }, { "epoch": 0.007604145721754072, "grad_norm": 7.7381910924238175, "learning_rate": 2.5285605483625287e-06, "loss": 17.778, "step": 416 }, { "epoch": 0.007622424918200596, "grad_norm": 8.475808326620589, "learning_rate": 2.534653465346535e-06, "loss": 17.9745, "step": 417 }, { "epoch": 0.00764070411464712, "grad_norm": 11.878356849600886, "learning_rate": 2.540746382330541e-06, "loss": 19.1678, "step": 418 }, { "epoch": 0.007658983311093644, "grad_norm": 8.731099408033131, "learning_rate": 2.546839299314547e-06, "loss": 18.3064, "step": 419 }, { "epoch": 0.0076772625075401685, "grad_norm": 9.373694875302448, "learning_rate": 2.552932216298553e-06, "loss": 18.4102, "step": 420 }, { "epoch": 0.007695541703986693, "grad_norm": 9.479865504833226, "learning_rate": 2.5590251332825596e-06, "loss": 18.679, "step": 421 }, { "epoch": 0.007713820900433217, "grad_norm": 9.656544881683654, "learning_rate": 2.565118050266565e-06, "loss": 18.8041, "step": 422 }, { "epoch": 0.007732100096879741, "grad_norm": 8.426811539874398, "learning_rate": 2.5712109672505715e-06, "loss": 18.1999, "step": 423 }, { "epoch": 0.007750379293326265, "grad_norm": 7.631132413151855, "learning_rate": 2.5773038842345777e-06, "loss": 17.8451, "step": 424 }, { "epoch": 0.0077686584897727895, "grad_norm": 9.967810100187242, "learning_rate": 2.5833968012185835e-06, "loss": 18.3001, "step": 425 }, { "epoch": 0.007786937686219314, "grad_norm": 10.34311365776934, "learning_rate": 2.5894897182025897e-06, "loss": 18.9942, "step": 426 }, { "epoch": 0.007805216882665838, "grad_norm": 8.436342406167185, "learning_rate": 2.595582635186596e-06, "loss": 18.5091, "step": 427 }, { "epoch": 0.007823496079112362, "grad_norm": 8.084051116156678, "learning_rate": 2.6016755521706016e-06, "loss": 18.2411, "step": 428 }, { "epoch": 0.007841775275558887, "grad_norm": 9.286977568279523, "learning_rate": 2.6077684691546078e-06, "loss": 18.3457, "step": 429 }, { "epoch": 0.00786005447200541, "grad_norm": 7.550128803367409, "learning_rate": 2.6138613861386144e-06, "loss": 17.8468, "step": 430 }, { "epoch": 0.007878333668451936, "grad_norm": 9.730459473486697, "learning_rate": 2.6199543031226206e-06, "loss": 18.5781, "step": 431 }, { "epoch": 0.00789661286489846, "grad_norm": 9.466978400057728, "learning_rate": 2.6260472201066263e-06, "loss": 18.767, "step": 432 }, { "epoch": 0.007914892061344983, "grad_norm": 8.812812776275303, "learning_rate": 2.6321401370906325e-06, "loss": 18.279, "step": 433 }, { "epoch": 0.007933171257791508, "grad_norm": 9.176005654446904, "learning_rate": 2.6382330540746387e-06, "loss": 18.8576, "step": 434 }, { "epoch": 0.007951450454238031, "grad_norm": 8.825127776543228, "learning_rate": 2.6443259710586444e-06, "loss": 18.334, "step": 435 }, { "epoch": 0.007969729650684557, "grad_norm": 8.78079113285075, "learning_rate": 2.6504188880426506e-06, "loss": 17.8408, "step": 436 }, { "epoch": 0.00798800884713108, "grad_norm": 8.891443387231634, "learning_rate": 2.656511805026657e-06, "loss": 18.2782, "step": 437 }, { "epoch": 0.008006288043577604, "grad_norm": 8.64136165084598, "learning_rate": 2.6626047220106626e-06, "loss": 18.2856, "step": 438 }, { "epoch": 0.008024567240024129, "grad_norm": 11.422630324831495, "learning_rate": 2.6686976389946687e-06, "loss": 19.0644, "step": 439 }, { "epoch": 0.008042846436470652, "grad_norm": 8.816431365246043, "learning_rate": 2.6747905559786754e-06, "loss": 18.2401, "step": 440 }, { "epoch": 0.008061125632917178, "grad_norm": 8.278936924735184, "learning_rate": 2.680883472962681e-06, "loss": 17.7142, "step": 441 }, { "epoch": 0.008079404829363701, "grad_norm": 9.882950868408622, "learning_rate": 2.6869763899466873e-06, "loss": 18.6747, "step": 442 }, { "epoch": 0.008097684025810225, "grad_norm": 8.455266639438943, "learning_rate": 2.6930693069306935e-06, "loss": 18.5501, "step": 443 }, { "epoch": 0.00811596322225675, "grad_norm": 10.369712253033393, "learning_rate": 2.6991622239146992e-06, "loss": 18.9826, "step": 444 }, { "epoch": 0.008134242418703273, "grad_norm": 7.588267951842359, "learning_rate": 2.7052551408987054e-06, "loss": 17.7885, "step": 445 }, { "epoch": 0.008152521615149799, "grad_norm": 7.933435039710536, "learning_rate": 2.7113480578827116e-06, "loss": 17.9875, "step": 446 }, { "epoch": 0.008170800811596322, "grad_norm": 9.844989948105257, "learning_rate": 2.7174409748667174e-06, "loss": 18.874, "step": 447 }, { "epoch": 0.008189080008042847, "grad_norm": 7.816887348231905, "learning_rate": 2.7235338918507235e-06, "loss": 17.8547, "step": 448 }, { "epoch": 0.00820735920448937, "grad_norm": 8.319887922416477, "learning_rate": 2.72962680883473e-06, "loss": 18.1645, "step": 449 }, { "epoch": 0.008225638400935894, "grad_norm": 8.794004300642134, "learning_rate": 2.7357197258187355e-06, "loss": 18.6253, "step": 450 }, { "epoch": 0.00824391759738242, "grad_norm": 8.052292213418909, "learning_rate": 2.741812642802742e-06, "loss": 17.8093, "step": 451 }, { "epoch": 0.008262196793828943, "grad_norm": 9.269781357392727, "learning_rate": 2.7479055597867483e-06, "loss": 18.4396, "step": 452 }, { "epoch": 0.008280475990275468, "grad_norm": 8.652844062265205, "learning_rate": 2.7539984767707544e-06, "loss": 18.473, "step": 453 }, { "epoch": 0.008298755186721992, "grad_norm": 7.383249069054072, "learning_rate": 2.76009139375476e-06, "loss": 17.7205, "step": 454 }, { "epoch": 0.008317034383168515, "grad_norm": 8.18718452206247, "learning_rate": 2.7661843107387664e-06, "loss": 17.7939, "step": 455 }, { "epoch": 0.00833531357961504, "grad_norm": 10.176755743735992, "learning_rate": 2.7722772277227726e-06, "loss": 18.5029, "step": 456 }, { "epoch": 0.008353592776061564, "grad_norm": 9.711682630724553, "learning_rate": 2.7783701447067783e-06, "loss": 18.4737, "step": 457 }, { "epoch": 0.00837187197250809, "grad_norm": 9.317443047461476, "learning_rate": 2.784463061690785e-06, "loss": 18.1023, "step": 458 }, { "epoch": 0.008390151168954613, "grad_norm": 8.11083945397948, "learning_rate": 2.790555978674791e-06, "loss": 17.7362, "step": 459 }, { "epoch": 0.008408430365401136, "grad_norm": 7.3596780356905, "learning_rate": 2.796648895658797e-06, "loss": 18.0177, "step": 460 }, { "epoch": 0.008426709561847661, "grad_norm": 9.332385377216356, "learning_rate": 2.802741812642803e-06, "loss": 18.4037, "step": 461 }, { "epoch": 0.008444988758294185, "grad_norm": 8.070677476220393, "learning_rate": 2.8088347296268092e-06, "loss": 18.1327, "step": 462 }, { "epoch": 0.00846326795474071, "grad_norm": 10.043995385236958, "learning_rate": 2.814927646610815e-06, "loss": 18.7866, "step": 463 }, { "epoch": 0.008481547151187234, "grad_norm": 9.612765078676649, "learning_rate": 2.821020563594821e-06, "loss": 18.3719, "step": 464 }, { "epoch": 0.008499826347633759, "grad_norm": 9.096496728607754, "learning_rate": 2.8271134805788274e-06, "loss": 18.3143, "step": 465 }, { "epoch": 0.008518105544080282, "grad_norm": 8.650570824357551, "learning_rate": 2.833206397562833e-06, "loss": 18.2524, "step": 466 }, { "epoch": 0.008536384740526806, "grad_norm": 8.783777135279273, "learning_rate": 2.8392993145468393e-06, "loss": 18.1164, "step": 467 }, { "epoch": 0.008554663936973331, "grad_norm": 9.136119852739785, "learning_rate": 2.845392231530846e-06, "loss": 18.4177, "step": 468 }, { "epoch": 0.008572943133419855, "grad_norm": 6.913422302003662, "learning_rate": 2.8514851485148517e-06, "loss": 17.6912, "step": 469 }, { "epoch": 0.00859122232986638, "grad_norm": 9.128585392774562, "learning_rate": 2.857578065498858e-06, "loss": 18.6487, "step": 470 }, { "epoch": 0.008609501526312903, "grad_norm": 9.215913965517334, "learning_rate": 2.863670982482864e-06, "loss": 18.528, "step": 471 }, { "epoch": 0.008627780722759427, "grad_norm": 7.578818180282947, "learning_rate": 2.86976389946687e-06, "loss": 17.693, "step": 472 }, { "epoch": 0.008646059919205952, "grad_norm": 8.458790231416076, "learning_rate": 2.875856816450876e-06, "loss": 18.4237, "step": 473 }, { "epoch": 0.008664339115652476, "grad_norm": 8.536550233318124, "learning_rate": 2.881949733434882e-06, "loss": 18.6088, "step": 474 }, { "epoch": 0.008682618312099, "grad_norm": 8.39836290974198, "learning_rate": 2.8880426504188883e-06, "loss": 18.0771, "step": 475 }, { "epoch": 0.008700897508545524, "grad_norm": 8.847571763064265, "learning_rate": 2.894135567402894e-06, "loss": 18.2639, "step": 476 }, { "epoch": 0.008719176704992048, "grad_norm": 8.476705894783775, "learning_rate": 2.9002284843869007e-06, "loss": 17.8502, "step": 477 }, { "epoch": 0.008737455901438573, "grad_norm": 8.431699688669129, "learning_rate": 2.906321401370907e-06, "loss": 18.1192, "step": 478 }, { "epoch": 0.008755735097885096, "grad_norm": 10.200575097290223, "learning_rate": 2.9124143183549126e-06, "loss": 18.5815, "step": 479 }, { "epoch": 0.008774014294331622, "grad_norm": 8.227135289405737, "learning_rate": 2.918507235338919e-06, "loss": 18.035, "step": 480 }, { "epoch": 0.008792293490778145, "grad_norm": 9.188182347644483, "learning_rate": 2.924600152322925e-06, "loss": 18.5853, "step": 481 }, { "epoch": 0.00881057268722467, "grad_norm": 8.748967205379937, "learning_rate": 2.9306930693069308e-06, "loss": 18.3194, "step": 482 }, { "epoch": 0.008828851883671194, "grad_norm": 8.974929578637878, "learning_rate": 2.936785986290937e-06, "loss": 18.4196, "step": 483 }, { "epoch": 0.008847131080117717, "grad_norm": 8.726187193187672, "learning_rate": 2.942878903274943e-06, "loss": 18.5871, "step": 484 }, { "epoch": 0.008865410276564243, "grad_norm": 8.242732808561213, "learning_rate": 2.948971820258949e-06, "loss": 18.1831, "step": 485 }, { "epoch": 0.008883689473010766, "grad_norm": 8.258898171491985, "learning_rate": 2.9550647372429555e-06, "loss": 18.0755, "step": 486 }, { "epoch": 0.008901968669457291, "grad_norm": 8.393333887873215, "learning_rate": 2.9611576542269617e-06, "loss": 18.3118, "step": 487 }, { "epoch": 0.008920247865903815, "grad_norm": 8.683881824599178, "learning_rate": 2.9672505712109674e-06, "loss": 18.2373, "step": 488 }, { "epoch": 0.008938527062350338, "grad_norm": 10.074471771430872, "learning_rate": 2.9733434881949736e-06, "loss": 18.8629, "step": 489 }, { "epoch": 0.008956806258796864, "grad_norm": 9.057481759135253, "learning_rate": 2.97943640517898e-06, "loss": 18.1558, "step": 490 }, { "epoch": 0.008975085455243387, "grad_norm": 8.608543843136571, "learning_rate": 2.9855293221629856e-06, "loss": 18.1241, "step": 491 }, { "epoch": 0.008993364651689912, "grad_norm": 8.893306631487548, "learning_rate": 2.9916222391469917e-06, "loss": 18.4825, "step": 492 }, { "epoch": 0.009011643848136436, "grad_norm": 8.26756330038832, "learning_rate": 2.997715156130998e-06, "loss": 18.3376, "step": 493 }, { "epoch": 0.00902992304458296, "grad_norm": 10.071683744373866, "learning_rate": 3.0038080731150045e-06, "loss": 18.6427, "step": 494 }, { "epoch": 0.009048202241029485, "grad_norm": 7.610212877203644, "learning_rate": 3.00990099009901e-06, "loss": 17.6488, "step": 495 }, { "epoch": 0.009066481437476008, "grad_norm": 8.866784008361785, "learning_rate": 3.0159939070830165e-06, "loss": 18.5203, "step": 496 }, { "epoch": 0.009084760633922533, "grad_norm": 8.267221539992438, "learning_rate": 3.0220868240670226e-06, "loss": 18.162, "step": 497 }, { "epoch": 0.009103039830369057, "grad_norm": 9.833899972727973, "learning_rate": 3.0281797410510284e-06, "loss": 18.4136, "step": 498 }, { "epoch": 0.009121319026815582, "grad_norm": 8.598654687238412, "learning_rate": 3.0342726580350346e-06, "loss": 18.1908, "step": 499 }, { "epoch": 0.009139598223262105, "grad_norm": 8.83631830602541, "learning_rate": 3.0403655750190408e-06, "loss": 18.255, "step": 500 }, { "epoch": 0.009157877419708629, "grad_norm": 8.967581726984752, "learning_rate": 3.0464584920030465e-06, "loss": 18.3796, "step": 501 }, { "epoch": 0.009176156616155154, "grad_norm": 8.391512538029074, "learning_rate": 3.0525514089870527e-06, "loss": 18.1908, "step": 502 }, { "epoch": 0.009194435812601678, "grad_norm": 8.489402237604713, "learning_rate": 3.058644325971059e-06, "loss": 18.0274, "step": 503 }, { "epoch": 0.009212715009048203, "grad_norm": 9.185868381210456, "learning_rate": 3.0647372429550646e-06, "loss": 18.7543, "step": 504 }, { "epoch": 0.009230994205494726, "grad_norm": 8.64799578928608, "learning_rate": 3.0708301599390713e-06, "loss": 18.178, "step": 505 }, { "epoch": 0.00924927340194125, "grad_norm": 8.024152009533692, "learning_rate": 3.0769230769230774e-06, "loss": 17.8823, "step": 506 }, { "epoch": 0.009267552598387775, "grad_norm": 8.913452620302673, "learning_rate": 3.083015993907083e-06, "loss": 18.2871, "step": 507 }, { "epoch": 0.009285831794834299, "grad_norm": 8.739662477117557, "learning_rate": 3.0891089108910894e-06, "loss": 17.8769, "step": 508 }, { "epoch": 0.009304110991280824, "grad_norm": 8.00629871672884, "learning_rate": 3.0952018278750956e-06, "loss": 18.0679, "step": 509 }, { "epoch": 0.009322390187727347, "grad_norm": 9.412734562267573, "learning_rate": 3.1012947448591013e-06, "loss": 18.6921, "step": 510 }, { "epoch": 0.009340669384173871, "grad_norm": 7.921780414150499, "learning_rate": 3.1073876618431075e-06, "loss": 17.9678, "step": 511 }, { "epoch": 0.009358948580620396, "grad_norm": 7.302084134718715, "learning_rate": 3.1134805788271137e-06, "loss": 17.5462, "step": 512 }, { "epoch": 0.00937722777706692, "grad_norm": 10.521947594287234, "learning_rate": 3.1195734958111194e-06, "loss": 19.0029, "step": 513 }, { "epoch": 0.009395506973513445, "grad_norm": 8.848198510870068, "learning_rate": 3.125666412795126e-06, "loss": 18.1732, "step": 514 }, { "epoch": 0.009413786169959968, "grad_norm": 7.693917315803662, "learning_rate": 3.1317593297791322e-06, "loss": 17.8773, "step": 515 }, { "epoch": 0.009432065366406494, "grad_norm": 10.244409066966945, "learning_rate": 3.1378522467631384e-06, "loss": 18.6571, "step": 516 }, { "epoch": 0.009450344562853017, "grad_norm": 7.874769630024627, "learning_rate": 3.143945163747144e-06, "loss": 17.8227, "step": 517 }, { "epoch": 0.00946862375929954, "grad_norm": 7.241900829546251, "learning_rate": 3.1500380807311503e-06, "loss": 17.5432, "step": 518 }, { "epoch": 0.009486902955746066, "grad_norm": 8.907781822473183, "learning_rate": 3.1561309977151565e-06, "loss": 18.0927, "step": 519 }, { "epoch": 0.00950518215219259, "grad_norm": 8.46348947828814, "learning_rate": 3.1622239146991623e-06, "loss": 18.3927, "step": 520 }, { "epoch": 0.009523461348639115, "grad_norm": 9.445588967967891, "learning_rate": 3.1683168316831685e-06, "loss": 18.5475, "step": 521 }, { "epoch": 0.009541740545085638, "grad_norm": 8.61062653825342, "learning_rate": 3.174409748667175e-06, "loss": 18.1747, "step": 522 }, { "epoch": 0.009560019741532162, "grad_norm": 8.633999487272066, "learning_rate": 3.1805026656511804e-06, "loss": 18.1129, "step": 523 }, { "epoch": 0.009578298937978687, "grad_norm": 8.65307697819484, "learning_rate": 3.186595582635187e-06, "loss": 18.0431, "step": 524 }, { "epoch": 0.00959657813442521, "grad_norm": 9.227500699335675, "learning_rate": 3.192688499619193e-06, "loss": 18.4762, "step": 525 }, { "epoch": 0.009614857330871735, "grad_norm": 8.172972022293687, "learning_rate": 3.198781416603199e-06, "loss": 18.3787, "step": 526 }, { "epoch": 0.009633136527318259, "grad_norm": 7.785109284311084, "learning_rate": 3.204874333587205e-06, "loss": 17.7882, "step": 527 }, { "epoch": 0.009651415723764782, "grad_norm": 9.420314791564312, "learning_rate": 3.2109672505712113e-06, "loss": 18.5747, "step": 528 }, { "epoch": 0.009669694920211308, "grad_norm": 9.244874151341696, "learning_rate": 3.217060167555217e-06, "loss": 18.0547, "step": 529 }, { "epoch": 0.009687974116657831, "grad_norm": 8.311126105224647, "learning_rate": 3.2231530845392233e-06, "loss": 18.0579, "step": 530 }, { "epoch": 0.009706253313104356, "grad_norm": 8.997860459062794, "learning_rate": 3.2292460015232294e-06, "loss": 18.2353, "step": 531 }, { "epoch": 0.00972453250955088, "grad_norm": 8.59961107594842, "learning_rate": 3.235338918507235e-06, "loss": 18.2798, "step": 532 }, { "epoch": 0.009742811705997405, "grad_norm": 9.227733614804333, "learning_rate": 3.241431835491242e-06, "loss": 18.3154, "step": 533 }, { "epoch": 0.009761090902443929, "grad_norm": 8.635515736231104, "learning_rate": 3.247524752475248e-06, "loss": 18.5925, "step": 534 }, { "epoch": 0.009779370098890452, "grad_norm": 8.32479719423018, "learning_rate": 3.2536176694592537e-06, "loss": 17.6423, "step": 535 }, { "epoch": 0.009797649295336977, "grad_norm": 8.790696290335909, "learning_rate": 3.25971058644326e-06, "loss": 18.4782, "step": 536 }, { "epoch": 0.009815928491783501, "grad_norm": 9.479560069100044, "learning_rate": 3.265803503427266e-06, "loss": 18.1152, "step": 537 }, { "epoch": 0.009834207688230026, "grad_norm": 8.192728144832243, "learning_rate": 3.2718964204112723e-06, "loss": 18.0095, "step": 538 }, { "epoch": 0.00985248688467655, "grad_norm": 7.876454527850719, "learning_rate": 3.277989337395278e-06, "loss": 17.8086, "step": 539 }, { "epoch": 0.009870766081123073, "grad_norm": 8.067141115388166, "learning_rate": 3.2840822543792842e-06, "loss": 18.0498, "step": 540 }, { "epoch": 0.009889045277569598, "grad_norm": 9.316743951006133, "learning_rate": 3.290175171363291e-06, "loss": 18.3959, "step": 541 }, { "epoch": 0.009907324474016122, "grad_norm": 8.47902828845691, "learning_rate": 3.2962680883472966e-06, "loss": 18.0238, "step": 542 }, { "epoch": 0.009925603670462647, "grad_norm": 8.856677568217423, "learning_rate": 3.3023610053313028e-06, "loss": 18.2624, "step": 543 }, { "epoch": 0.00994388286690917, "grad_norm": 8.443474590903095, "learning_rate": 3.308453922315309e-06, "loss": 18.1976, "step": 544 }, { "epoch": 0.009962162063355694, "grad_norm": 9.36612442369136, "learning_rate": 3.3145468392993147e-06, "loss": 18.8425, "step": 545 }, { "epoch": 0.00998044125980222, "grad_norm": 7.064347581584799, "learning_rate": 3.320639756283321e-06, "loss": 17.4856, "step": 546 }, { "epoch": 0.009998720456248743, "grad_norm": 8.9669000570803, "learning_rate": 3.326732673267327e-06, "loss": 17.9647, "step": 547 }, { "epoch": 0.010016999652695268, "grad_norm": 8.66512459966387, "learning_rate": 3.332825590251333e-06, "loss": 18.1868, "step": 548 }, { "epoch": 0.010035278849141792, "grad_norm": 8.010190288017151, "learning_rate": 3.338918507235339e-06, "loss": 18.1856, "step": 549 }, { "epoch": 0.010053558045588317, "grad_norm": 7.387120482691083, "learning_rate": 3.3450114242193456e-06, "loss": 17.6946, "step": 550 }, { "epoch": 0.01007183724203484, "grad_norm": 8.32475615889767, "learning_rate": 3.351104341203351e-06, "loss": 17.911, "step": 551 }, { "epoch": 0.010090116438481364, "grad_norm": 9.066205394885595, "learning_rate": 3.3571972581873576e-06, "loss": 18.9419, "step": 552 }, { "epoch": 0.010108395634927889, "grad_norm": 7.968193698210883, "learning_rate": 3.3632901751713638e-06, "loss": 18.0055, "step": 553 }, { "epoch": 0.010126674831374412, "grad_norm": 8.571786943624106, "learning_rate": 3.3693830921553695e-06, "loss": 18.0664, "step": 554 }, { "epoch": 0.010144954027820938, "grad_norm": 7.699019935806702, "learning_rate": 3.3754760091393757e-06, "loss": 18.0581, "step": 555 }, { "epoch": 0.010163233224267461, "grad_norm": 8.067866766547853, "learning_rate": 3.381568926123382e-06, "loss": 18.2582, "step": 556 }, { "epoch": 0.010181512420713985, "grad_norm": 9.433101264824572, "learning_rate": 3.387661843107388e-06, "loss": 18.5321, "step": 557 }, { "epoch": 0.01019979161716051, "grad_norm": 8.540100236577365, "learning_rate": 3.393754760091394e-06, "loss": 18.1843, "step": 558 }, { "epoch": 0.010218070813607033, "grad_norm": 9.476723770747716, "learning_rate": 3.3998476770754e-06, "loss": 18.5148, "step": 559 }, { "epoch": 0.010236350010053559, "grad_norm": 7.916222040531197, "learning_rate": 3.4059405940594066e-06, "loss": 17.8445, "step": 560 }, { "epoch": 0.010254629206500082, "grad_norm": 8.660451835322666, "learning_rate": 3.4120335110434124e-06, "loss": 18.2338, "step": 561 }, { "epoch": 0.010272908402946606, "grad_norm": 8.200092029419919, "learning_rate": 3.4181264280274185e-06, "loss": 17.8906, "step": 562 }, { "epoch": 0.01029118759939313, "grad_norm": 7.665208685321601, "learning_rate": 3.4242193450114247e-06, "loss": 18.0897, "step": 563 }, { "epoch": 0.010309466795839654, "grad_norm": 8.377511704569045, "learning_rate": 3.4303122619954305e-06, "loss": 18.3939, "step": 564 }, { "epoch": 0.01032774599228618, "grad_norm": 8.14051934165229, "learning_rate": 3.4364051789794367e-06, "loss": 18.0354, "step": 565 }, { "epoch": 0.010346025188732703, "grad_norm": 8.893467288989342, "learning_rate": 3.442498095963443e-06, "loss": 18.9523, "step": 566 }, { "epoch": 0.010364304385179228, "grad_norm": 13.06411652942076, "learning_rate": 3.4485910129474486e-06, "loss": 18.6884, "step": 567 }, { "epoch": 0.010382583581625752, "grad_norm": 8.159390916538014, "learning_rate": 3.454683929931455e-06, "loss": 17.9738, "step": 568 }, { "epoch": 0.010400862778072275, "grad_norm": 8.901397762873613, "learning_rate": 3.4607768469154614e-06, "loss": 18.5212, "step": 569 }, { "epoch": 0.0104191419745188, "grad_norm": 8.263202136880551, "learning_rate": 3.466869763899467e-06, "loss": 18.1432, "step": 570 }, { "epoch": 0.010437421170965324, "grad_norm": 8.642791941923635, "learning_rate": 3.4729626808834733e-06, "loss": 18.1083, "step": 571 }, { "epoch": 0.01045570036741185, "grad_norm": 9.079489731525967, "learning_rate": 3.4790555978674795e-06, "loss": 18.4298, "step": 572 }, { "epoch": 0.010473979563858373, "grad_norm": 8.28360939638146, "learning_rate": 3.4851485148514853e-06, "loss": 17.9617, "step": 573 }, { "epoch": 0.010492258760304896, "grad_norm": 8.334940108308801, "learning_rate": 3.4912414318354915e-06, "loss": 18.3559, "step": 574 }, { "epoch": 0.010510537956751421, "grad_norm": 7.857994330225646, "learning_rate": 3.4973343488194976e-06, "loss": 18.1056, "step": 575 }, { "epoch": 0.010528817153197945, "grad_norm": 8.330149196467916, "learning_rate": 3.5034272658035034e-06, "loss": 18.0329, "step": 576 }, { "epoch": 0.01054709634964447, "grad_norm": 8.661868500547321, "learning_rate": 3.5095201827875096e-06, "loss": 18.4591, "step": 577 }, { "epoch": 0.010565375546090994, "grad_norm": 8.163560354563982, "learning_rate": 3.515613099771516e-06, "loss": 18.0669, "step": 578 }, { "epoch": 0.010583654742537517, "grad_norm": 8.242693534936947, "learning_rate": 3.5217060167555224e-06, "loss": 18.1963, "step": 579 }, { "epoch": 0.010601933938984042, "grad_norm": 9.176823530741894, "learning_rate": 3.527798933739528e-06, "loss": 18.4939, "step": 580 }, { "epoch": 0.010620213135430566, "grad_norm": 8.788621060069486, "learning_rate": 3.5338918507235343e-06, "loss": 18.2809, "step": 581 }, { "epoch": 0.010638492331877091, "grad_norm": 8.090462283188902, "learning_rate": 3.5399847677075405e-06, "loss": 18.0944, "step": 582 }, { "epoch": 0.010656771528323615, "grad_norm": 8.596137634283256, "learning_rate": 3.5460776846915462e-06, "loss": 18.278, "step": 583 }, { "epoch": 0.01067505072477014, "grad_norm": 8.247695085227974, "learning_rate": 3.5521706016755524e-06, "loss": 18.1307, "step": 584 }, { "epoch": 0.010693329921216663, "grad_norm": 8.127347406336867, "learning_rate": 3.5582635186595586e-06, "loss": 18.1265, "step": 585 }, { "epoch": 0.010711609117663187, "grad_norm": 8.371783660452177, "learning_rate": 3.5643564356435644e-06, "loss": 18.1125, "step": 586 }, { "epoch": 0.010729888314109712, "grad_norm": 8.99520294894812, "learning_rate": 3.5704493526275706e-06, "loss": 18.2121, "step": 587 }, { "epoch": 0.010748167510556236, "grad_norm": 10.608664862244305, "learning_rate": 3.576542269611577e-06, "loss": 18.846, "step": 588 }, { "epoch": 0.01076644670700276, "grad_norm": 8.968867264754163, "learning_rate": 3.582635186595583e-06, "loss": 18.4245, "step": 589 }, { "epoch": 0.010784725903449284, "grad_norm": 7.8113289740846925, "learning_rate": 3.588728103579589e-06, "loss": 17.9216, "step": 590 }, { "epoch": 0.010803005099895808, "grad_norm": 8.547150312484515, "learning_rate": 3.5948210205635953e-06, "loss": 18.164, "step": 591 }, { "epoch": 0.010821284296342333, "grad_norm": 8.034774327385934, "learning_rate": 3.600913937547601e-06, "loss": 18.1415, "step": 592 }, { "epoch": 0.010839563492788857, "grad_norm": 8.482926628353267, "learning_rate": 3.6070068545316072e-06, "loss": 17.8454, "step": 593 }, { "epoch": 0.010857842689235382, "grad_norm": 7.159187145580739, "learning_rate": 3.6130997715156134e-06, "loss": 17.5982, "step": 594 }, { "epoch": 0.010876121885681905, "grad_norm": 6.956524183644067, "learning_rate": 3.619192688499619e-06, "loss": 17.7585, "step": 595 }, { "epoch": 0.010894401082128429, "grad_norm": 8.289400186115694, "learning_rate": 3.6252856054836253e-06, "loss": 18.2065, "step": 596 }, { "epoch": 0.010912680278574954, "grad_norm": 8.926577429484452, "learning_rate": 3.631378522467632e-06, "loss": 18.0875, "step": 597 }, { "epoch": 0.010930959475021478, "grad_norm": 9.22683538460072, "learning_rate": 3.6374714394516377e-06, "loss": 18.6423, "step": 598 }, { "epoch": 0.010949238671468003, "grad_norm": 8.270948741163098, "learning_rate": 3.643564356435644e-06, "loss": 17.7624, "step": 599 }, { "epoch": 0.010967517867914526, "grad_norm": 10.822683093286273, "learning_rate": 3.64965727341965e-06, "loss": 19.2622, "step": 600 }, { "epoch": 0.010985797064361051, "grad_norm": 8.958097787886413, "learning_rate": 3.6557501904036563e-06, "loss": 18.1253, "step": 601 }, { "epoch": 0.011004076260807575, "grad_norm": 7.719258571985554, "learning_rate": 3.661843107387662e-06, "loss": 17.9526, "step": 602 }, { "epoch": 0.011022355457254098, "grad_norm": 8.450593543919977, "learning_rate": 3.667936024371668e-06, "loss": 18.0805, "step": 603 }, { "epoch": 0.011040634653700624, "grad_norm": 8.90466662740431, "learning_rate": 3.6740289413556744e-06, "loss": 18.538, "step": 604 }, { "epoch": 0.011058913850147147, "grad_norm": 8.414490229419439, "learning_rate": 3.68012185833968e-06, "loss": 18.5623, "step": 605 }, { "epoch": 0.011077193046593672, "grad_norm": 7.692666688497806, "learning_rate": 3.6862147753236867e-06, "loss": 18.0062, "step": 606 }, { "epoch": 0.011095472243040196, "grad_norm": 8.293027250646789, "learning_rate": 3.692307692307693e-06, "loss": 17.9804, "step": 607 }, { "epoch": 0.01111375143948672, "grad_norm": 8.690172577012346, "learning_rate": 3.6984006092916987e-06, "loss": 18.4833, "step": 608 }, { "epoch": 0.011132030635933245, "grad_norm": 8.655996628896485, "learning_rate": 3.704493526275705e-06, "loss": 17.9411, "step": 609 }, { "epoch": 0.011150309832379768, "grad_norm": 8.91574917849141, "learning_rate": 3.710586443259711e-06, "loss": 18.256, "step": 610 }, { "epoch": 0.011168589028826293, "grad_norm": 7.01065365822396, "learning_rate": 3.716679360243717e-06, "loss": 17.572, "step": 611 }, { "epoch": 0.011186868225272817, "grad_norm": 9.289538437867213, "learning_rate": 3.722772277227723e-06, "loss": 18.6452, "step": 612 }, { "epoch": 0.01120514742171934, "grad_norm": 8.308113412260557, "learning_rate": 3.728865194211729e-06, "loss": 18.0685, "step": 613 }, { "epoch": 0.011223426618165866, "grad_norm": 7.931418707584115, "learning_rate": 3.734958111195735e-06, "loss": 18.0801, "step": 614 }, { "epoch": 0.011241705814612389, "grad_norm": 8.886288471507592, "learning_rate": 3.741051028179741e-06, "loss": 18.1922, "step": 615 }, { "epoch": 0.011259985011058914, "grad_norm": 7.177341956825686, "learning_rate": 3.7471439451637477e-06, "loss": 17.8097, "step": 616 }, { "epoch": 0.011278264207505438, "grad_norm": 8.904115254334988, "learning_rate": 3.7532368621477535e-06, "loss": 18.1632, "step": 617 }, { "epoch": 0.011296543403951963, "grad_norm": 8.775998773629942, "learning_rate": 3.7593297791317597e-06, "loss": 18.4946, "step": 618 }, { "epoch": 0.011314822600398487, "grad_norm": 7.860619604169376, "learning_rate": 3.765422696115766e-06, "loss": 17.8058, "step": 619 }, { "epoch": 0.01133310179684501, "grad_norm": 9.143618681610725, "learning_rate": 3.771515613099772e-06, "loss": 18.5067, "step": 620 }, { "epoch": 0.011351380993291535, "grad_norm": 7.641110873624864, "learning_rate": 3.7776085300837778e-06, "loss": 18.0647, "step": 621 }, { "epoch": 0.011369660189738059, "grad_norm": 9.881622540341965, "learning_rate": 3.783701447067784e-06, "loss": 18.7808, "step": 622 }, { "epoch": 0.011387939386184584, "grad_norm": 9.079147581664522, "learning_rate": 3.7897943640517906e-06, "loss": 18.6017, "step": 623 }, { "epoch": 0.011406218582631108, "grad_norm": 9.919861848991172, "learning_rate": 3.795887281035796e-06, "loss": 18.5653, "step": 624 }, { "epoch": 0.011424497779077631, "grad_norm": 8.772754960101011, "learning_rate": 3.8019801980198025e-06, "loss": 18.4951, "step": 625 }, { "epoch": 0.011442776975524156, "grad_norm": 9.245420728499028, "learning_rate": 3.8080731150038087e-06, "loss": 18.5503, "step": 626 }, { "epoch": 0.01146105617197068, "grad_norm": 7.1149859102439015, "learning_rate": 3.8141660319878144e-06, "loss": 17.7896, "step": 627 }, { "epoch": 0.011479335368417205, "grad_norm": 8.922405789331233, "learning_rate": 3.820258948971821e-06, "loss": 18.1187, "step": 628 }, { "epoch": 0.011497614564863728, "grad_norm": 9.334662108517135, "learning_rate": 3.826351865955827e-06, "loss": 18.4043, "step": 629 }, { "epoch": 0.011515893761310252, "grad_norm": 7.991348921154775, "learning_rate": 3.832444782939833e-06, "loss": 17.9648, "step": 630 }, { "epoch": 0.011534172957756777, "grad_norm": 10.35442384336909, "learning_rate": 3.838537699923839e-06, "loss": 18.8284, "step": 631 }, { "epoch": 0.0115524521542033, "grad_norm": 7.6281038550182485, "learning_rate": 3.844630616907845e-06, "loss": 17.7663, "step": 632 }, { "epoch": 0.011570731350649826, "grad_norm": 9.003867909251422, "learning_rate": 3.850723533891851e-06, "loss": 18.4097, "step": 633 }, { "epoch": 0.01158901054709635, "grad_norm": 9.353285303990113, "learning_rate": 3.856816450875857e-06, "loss": 18.3862, "step": 634 }, { "epoch": 0.011607289743542875, "grad_norm": 7.913205735721484, "learning_rate": 3.8629093678598635e-06, "loss": 17.7015, "step": 635 }, { "epoch": 0.011625568939989398, "grad_norm": 7.850792293732742, "learning_rate": 3.869002284843869e-06, "loss": 17.7488, "step": 636 }, { "epoch": 0.011643848136435922, "grad_norm": 8.565520597603587, "learning_rate": 3.875095201827875e-06, "loss": 18.0983, "step": 637 }, { "epoch": 0.011662127332882447, "grad_norm": 9.956027586916676, "learning_rate": 3.881188118811882e-06, "loss": 18.7461, "step": 638 }, { "epoch": 0.01168040652932897, "grad_norm": 10.054966931273652, "learning_rate": 3.887281035795887e-06, "loss": 18.8291, "step": 639 }, { "epoch": 0.011698685725775496, "grad_norm": 9.251972467038426, "learning_rate": 3.893373952779894e-06, "loss": 18.4111, "step": 640 }, { "epoch": 0.011716964922222019, "grad_norm": 8.358312202277967, "learning_rate": 3.8994668697639e-06, "loss": 18.0409, "step": 641 }, { "epoch": 0.011735244118668543, "grad_norm": 8.422320418791143, "learning_rate": 3.905559786747906e-06, "loss": 18.1452, "step": 642 }, { "epoch": 0.011753523315115068, "grad_norm": 7.671429243467571, "learning_rate": 3.911652703731912e-06, "loss": 18.0494, "step": 643 }, { "epoch": 0.011771802511561591, "grad_norm": 8.50051421134067, "learning_rate": 3.917745620715918e-06, "loss": 18.4015, "step": 644 }, { "epoch": 0.011790081708008117, "grad_norm": 7.760926670958736, "learning_rate": 3.9238385376999244e-06, "loss": 17.921, "step": 645 }, { "epoch": 0.01180836090445464, "grad_norm": 11.513527376413613, "learning_rate": 3.92993145468393e-06, "loss": 18.1754, "step": 646 }, { "epoch": 0.011826640100901164, "grad_norm": 8.028483144791913, "learning_rate": 3.936024371667936e-06, "loss": 18.1008, "step": 647 }, { "epoch": 0.011844919297347689, "grad_norm": 8.918002922760337, "learning_rate": 3.9421172886519426e-06, "loss": 18.5311, "step": 648 }, { "epoch": 0.011863198493794212, "grad_norm": 8.4695793378545, "learning_rate": 3.948210205635948e-06, "loss": 18.1985, "step": 649 }, { "epoch": 0.011881477690240737, "grad_norm": 8.294055827860042, "learning_rate": 3.954303122619955e-06, "loss": 18.1201, "step": 650 }, { "epoch": 0.011899756886687261, "grad_norm": 9.017151892282369, "learning_rate": 3.960396039603961e-06, "loss": 18.646, "step": 651 }, { "epoch": 0.011918036083133786, "grad_norm": 9.311975409243328, "learning_rate": 3.9664889565879665e-06, "loss": 18.3601, "step": 652 }, { "epoch": 0.01193631527958031, "grad_norm": 9.28881290790521, "learning_rate": 3.972581873571973e-06, "loss": 18.3552, "step": 653 }, { "epoch": 0.011954594476026833, "grad_norm": 8.911077660865459, "learning_rate": 3.978674790555979e-06, "loss": 18.6692, "step": 654 }, { "epoch": 0.011972873672473358, "grad_norm": 8.750159759354592, "learning_rate": 3.9847677075399846e-06, "loss": 17.8843, "step": 655 }, { "epoch": 0.011991152868919882, "grad_norm": 7.5063771211308845, "learning_rate": 3.990860624523991e-06, "loss": 17.684, "step": 656 }, { "epoch": 0.012009432065366407, "grad_norm": 9.058283847455503, "learning_rate": 3.996953541507998e-06, "loss": 18.5061, "step": 657 }, { "epoch": 0.01202771126181293, "grad_norm": 10.043624776829995, "learning_rate": 4.0030464584920035e-06, "loss": 18.1349, "step": 658 }, { "epoch": 0.012045990458259454, "grad_norm": 8.24023318173618, "learning_rate": 4.009139375476009e-06, "loss": 18.1363, "step": 659 }, { "epoch": 0.01206426965470598, "grad_norm": 9.524182484965243, "learning_rate": 4.015232292460016e-06, "loss": 18.3919, "step": 660 }, { "epoch": 0.012082548851152503, "grad_norm": 8.654553969481384, "learning_rate": 4.021325209444022e-06, "loss": 17.8421, "step": 661 }, { "epoch": 0.012100828047599028, "grad_norm": 8.413561700429844, "learning_rate": 4.0274181264280274e-06, "loss": 17.94, "step": 662 }, { "epoch": 0.012119107244045552, "grad_norm": 8.42006127552643, "learning_rate": 4.033511043412034e-06, "loss": 18.0531, "step": 663 }, { "epoch": 0.012137386440492075, "grad_norm": 8.256668684070519, "learning_rate": 4.03960396039604e-06, "loss": 17.8619, "step": 664 }, { "epoch": 0.0121556656369386, "grad_norm": 7.924822317596749, "learning_rate": 4.0456968773800455e-06, "loss": 18.0097, "step": 665 }, { "epoch": 0.012173944833385124, "grad_norm": 8.47007375308315, "learning_rate": 4.051789794364052e-06, "loss": 18.0684, "step": 666 }, { "epoch": 0.012192224029831649, "grad_norm": 9.597647355318228, "learning_rate": 4.057882711348059e-06, "loss": 18.5228, "step": 667 }, { "epoch": 0.012210503226278173, "grad_norm": 8.081301569529527, "learning_rate": 4.0639756283320645e-06, "loss": 18.029, "step": 668 }, { "epoch": 0.012228782422724698, "grad_norm": 8.09801481857068, "learning_rate": 4.07006854531607e-06, "loss": 17.9184, "step": 669 }, { "epoch": 0.012247061619171221, "grad_norm": 7.773602705447997, "learning_rate": 4.076161462300077e-06, "loss": 17.8822, "step": 670 }, { "epoch": 0.012265340815617745, "grad_norm": 9.38410411202206, "learning_rate": 4.082254379284083e-06, "loss": 18.2827, "step": 671 }, { "epoch": 0.01228362001206427, "grad_norm": 9.93505517562674, "learning_rate": 4.088347296268088e-06, "loss": 18.6631, "step": 672 }, { "epoch": 0.012301899208510794, "grad_norm": 10.076632024957327, "learning_rate": 4.094440213252095e-06, "loss": 18.8522, "step": 673 }, { "epoch": 0.012320178404957319, "grad_norm": 9.590342148059808, "learning_rate": 4.100533130236101e-06, "loss": 18.4533, "step": 674 }, { "epoch": 0.012338457601403842, "grad_norm": 8.648378057532792, "learning_rate": 4.1066260472201065e-06, "loss": 18.3743, "step": 675 }, { "epoch": 0.012356736797850366, "grad_norm": 9.225214321976841, "learning_rate": 4.112718964204113e-06, "loss": 18.5045, "step": 676 }, { "epoch": 0.012375015994296891, "grad_norm": 9.641471319011561, "learning_rate": 4.118811881188119e-06, "loss": 18.5693, "step": 677 }, { "epoch": 0.012393295190743414, "grad_norm": 8.71795174350895, "learning_rate": 4.1249047981721255e-06, "loss": 18.3874, "step": 678 }, { "epoch": 0.01241157438718994, "grad_norm": 8.631448803632992, "learning_rate": 4.130997715156131e-06, "loss": 18.1557, "step": 679 }, { "epoch": 0.012429853583636463, "grad_norm": 8.282374061730781, "learning_rate": 4.137090632140137e-06, "loss": 18.2244, "step": 680 }, { "epoch": 0.012448132780082987, "grad_norm": 7.363293065006019, "learning_rate": 4.143183549124144e-06, "loss": 17.7314, "step": 681 }, { "epoch": 0.012466411976529512, "grad_norm": 10.797530506423593, "learning_rate": 4.149276466108149e-06, "loss": 18.9832, "step": 682 }, { "epoch": 0.012484691172976035, "grad_norm": 7.879375084137035, "learning_rate": 4.155369383092156e-06, "loss": 18.1933, "step": 683 }, { "epoch": 0.01250297036942256, "grad_norm": 8.588918231392771, "learning_rate": 4.161462300076162e-06, "loss": 18.3093, "step": 684 }, { "epoch": 0.012521249565869084, "grad_norm": 8.136157675867892, "learning_rate": 4.167555217060168e-06, "loss": 17.8996, "step": 685 }, { "epoch": 0.01253952876231561, "grad_norm": 8.244391120869468, "learning_rate": 4.173648134044174e-06, "loss": 18.3063, "step": 686 }, { "epoch": 0.012557807958762133, "grad_norm": 9.254053025083964, "learning_rate": 4.17974105102818e-06, "loss": 18.552, "step": 687 }, { "epoch": 0.012576087155208656, "grad_norm": 8.510600321405521, "learning_rate": 4.1858339680121865e-06, "loss": 18.4846, "step": 688 }, { "epoch": 0.012594366351655182, "grad_norm": 8.01458664798195, "learning_rate": 4.191926884996192e-06, "loss": 17.715, "step": 689 }, { "epoch": 0.012612645548101705, "grad_norm": 8.626646670224346, "learning_rate": 4.198019801980198e-06, "loss": 18.2243, "step": 690 }, { "epoch": 0.01263092474454823, "grad_norm": 8.939771947894256, "learning_rate": 4.204112718964205e-06, "loss": 18.5696, "step": 691 }, { "epoch": 0.012649203940994754, "grad_norm": 8.120519986053191, "learning_rate": 4.21020563594821e-06, "loss": 17.9166, "step": 692 }, { "epoch": 0.012667483137441277, "grad_norm": 8.029242137098644, "learning_rate": 4.216298552932216e-06, "loss": 18.2364, "step": 693 }, { "epoch": 0.012685762333887803, "grad_norm": 8.858387502251698, "learning_rate": 4.222391469916223e-06, "loss": 18.4259, "step": 694 }, { "epoch": 0.012704041530334326, "grad_norm": 7.660860311701185, "learning_rate": 4.228484386900229e-06, "loss": 17.7606, "step": 695 }, { "epoch": 0.012722320726780851, "grad_norm": 9.906135660762699, "learning_rate": 4.234577303884235e-06, "loss": 18.7554, "step": 696 }, { "epoch": 0.012740599923227375, "grad_norm": 9.840851089802282, "learning_rate": 4.240670220868241e-06, "loss": 18.6306, "step": 697 }, { "epoch": 0.012758879119673898, "grad_norm": 8.604357795575616, "learning_rate": 4.2467631378522474e-06, "loss": 18.024, "step": 698 }, { "epoch": 0.012777158316120424, "grad_norm": 9.356382353257045, "learning_rate": 4.252856054836253e-06, "loss": 18.5935, "step": 699 }, { "epoch": 0.012795437512566947, "grad_norm": 9.281401062431087, "learning_rate": 4.258948971820259e-06, "loss": 18.2712, "step": 700 }, { "epoch": 0.012813716709013472, "grad_norm": 9.449169354332904, "learning_rate": 4.2650418888042656e-06, "loss": 18.5111, "step": 701 }, { "epoch": 0.012831995905459996, "grad_norm": 6.581590647391288, "learning_rate": 4.271134805788271e-06, "loss": 17.2692, "step": 702 }, { "epoch": 0.012850275101906521, "grad_norm": 8.941852584792075, "learning_rate": 4.277227722772277e-06, "loss": 18.1318, "step": 703 }, { "epoch": 0.012868554298353044, "grad_norm": 11.52066482368107, "learning_rate": 4.283320639756284e-06, "loss": 17.9786, "step": 704 }, { "epoch": 0.012886833494799568, "grad_norm": 9.50710199009215, "learning_rate": 4.28941355674029e-06, "loss": 18.4536, "step": 705 }, { "epoch": 0.012905112691246093, "grad_norm": 6.858864428444414, "learning_rate": 4.295506473724296e-06, "loss": 17.5341, "step": 706 }, { "epoch": 0.012923391887692617, "grad_norm": 8.694385305192165, "learning_rate": 4.301599390708302e-06, "loss": 18.3906, "step": 707 }, { "epoch": 0.012941671084139142, "grad_norm": 8.10655763097337, "learning_rate": 4.307692307692308e-06, "loss": 18.2554, "step": 708 }, { "epoch": 0.012959950280585665, "grad_norm": 10.10716824262294, "learning_rate": 4.313785224676314e-06, "loss": 18.3628, "step": 709 }, { "epoch": 0.012978229477032189, "grad_norm": 8.193394417599954, "learning_rate": 4.31987814166032e-06, "loss": 17.9166, "step": 710 }, { "epoch": 0.012996508673478714, "grad_norm": 9.12124387859505, "learning_rate": 4.3259710586443265e-06, "loss": 18.2743, "step": 711 }, { "epoch": 0.013014787869925238, "grad_norm": 8.275436544030585, "learning_rate": 4.332063975628332e-06, "loss": 18.3223, "step": 712 }, { "epoch": 0.013033067066371763, "grad_norm": 9.919219202939884, "learning_rate": 4.338156892612339e-06, "loss": 18.7695, "step": 713 }, { "epoch": 0.013051346262818286, "grad_norm": 9.006295734611193, "learning_rate": 4.344249809596345e-06, "loss": 18.4789, "step": 714 }, { "epoch": 0.01306962545926481, "grad_norm": 8.727624480806657, "learning_rate": 4.35034272658035e-06, "loss": 18.3371, "step": 715 }, { "epoch": 0.013087904655711335, "grad_norm": 7.154181830598685, "learning_rate": 4.356435643564357e-06, "loss": 17.5559, "step": 716 }, { "epoch": 0.013106183852157859, "grad_norm": 7.965608283923321, "learning_rate": 4.362528560548363e-06, "loss": 18.0304, "step": 717 }, { "epoch": 0.013124463048604384, "grad_norm": 7.751987635624145, "learning_rate": 4.3686214775323685e-06, "loss": 17.7836, "step": 718 }, { "epoch": 0.013142742245050907, "grad_norm": 9.822875745653093, "learning_rate": 4.374714394516375e-06, "loss": 18.4279, "step": 719 }, { "epoch": 0.013161021441497433, "grad_norm": 11.257898723251781, "learning_rate": 4.380807311500381e-06, "loss": 18.8577, "step": 720 }, { "epoch": 0.013179300637943956, "grad_norm": 7.736540051326125, "learning_rate": 4.386900228484387e-06, "loss": 17.7715, "step": 721 }, { "epoch": 0.01319757983439048, "grad_norm": 7.872605497863902, "learning_rate": 4.392993145468393e-06, "loss": 17.7712, "step": 722 }, { "epoch": 0.013215859030837005, "grad_norm": 9.113611652172722, "learning_rate": 4.3990860624524e-06, "loss": 18.3717, "step": 723 }, { "epoch": 0.013234138227283528, "grad_norm": 8.358889954921496, "learning_rate": 4.405178979436406e-06, "loss": 18.1748, "step": 724 }, { "epoch": 0.013252417423730053, "grad_norm": 7.803959399616063, "learning_rate": 4.411271896420411e-06, "loss": 17.8882, "step": 725 }, { "epoch": 0.013270696620176577, "grad_norm": 10.211516845076726, "learning_rate": 4.417364813404418e-06, "loss": 18.6295, "step": 726 }, { "epoch": 0.0132889758166231, "grad_norm": 8.15584773898762, "learning_rate": 4.423457730388424e-06, "loss": 17.8261, "step": 727 }, { "epoch": 0.013307255013069626, "grad_norm": 8.265193313412619, "learning_rate": 4.4295506473724295e-06, "loss": 18.2289, "step": 728 }, { "epoch": 0.01332553420951615, "grad_norm": 9.83364866575862, "learning_rate": 4.435643564356436e-06, "loss": 18.6857, "step": 729 }, { "epoch": 0.013343813405962674, "grad_norm": 8.859709514341905, "learning_rate": 4.441736481340443e-06, "loss": 18.426, "step": 730 }, { "epoch": 0.013362092602409198, "grad_norm": 6.84547356640548, "learning_rate": 4.447829398324448e-06, "loss": 17.7192, "step": 731 }, { "epoch": 0.013380371798855721, "grad_norm": 9.62516363738731, "learning_rate": 4.453922315308454e-06, "loss": 18.5106, "step": 732 }, { "epoch": 0.013398650995302247, "grad_norm": 9.710791197347461, "learning_rate": 4.460015232292461e-06, "loss": 18.3155, "step": 733 }, { "epoch": 0.01341693019174877, "grad_norm": 7.088619072988261, "learning_rate": 4.466108149276467e-06, "loss": 17.5585, "step": 734 }, { "epoch": 0.013435209388195295, "grad_norm": 8.813905602728473, "learning_rate": 4.472201066260472e-06, "loss": 18.3018, "step": 735 }, { "epoch": 0.013453488584641819, "grad_norm": 7.7363833694697375, "learning_rate": 4.478293983244479e-06, "loss": 17.8962, "step": 736 }, { "epoch": 0.013471767781088344, "grad_norm": 8.71227916566431, "learning_rate": 4.484386900228485e-06, "loss": 18.3363, "step": 737 }, { "epoch": 0.013490046977534868, "grad_norm": 7.840887611839472, "learning_rate": 4.4904798172124905e-06, "loss": 18.0087, "step": 738 }, { "epoch": 0.013508326173981391, "grad_norm": 8.133505317881973, "learning_rate": 4.496572734196497e-06, "loss": 18.1477, "step": 739 }, { "epoch": 0.013526605370427916, "grad_norm": 7.991598762492575, "learning_rate": 4.502665651180503e-06, "loss": 18.1431, "step": 740 }, { "epoch": 0.01354488456687444, "grad_norm": 8.610191757930943, "learning_rate": 4.5087585681645095e-06, "loss": 18.3059, "step": 741 }, { "epoch": 0.013563163763320965, "grad_norm": 7.179232227677593, "learning_rate": 4.514851485148515e-06, "loss": 17.4715, "step": 742 }, { "epoch": 0.013581442959767489, "grad_norm": 8.35948954945627, "learning_rate": 4.520944402132521e-06, "loss": 18.2916, "step": 743 }, { "epoch": 0.013599722156214012, "grad_norm": 8.46206693768903, "learning_rate": 4.5270373191165276e-06, "loss": 17.9517, "step": 744 }, { "epoch": 0.013618001352660537, "grad_norm": 10.010037950216763, "learning_rate": 4.533130236100533e-06, "loss": 19.1337, "step": 745 }, { "epoch": 0.01363628054910706, "grad_norm": 10.033585816329204, "learning_rate": 4.53922315308454e-06, "loss": 18.9602, "step": 746 }, { "epoch": 0.013654559745553586, "grad_norm": 11.821488713055107, "learning_rate": 4.545316070068546e-06, "loss": 18.67, "step": 747 }, { "epoch": 0.01367283894200011, "grad_norm": 8.7428412408898, "learning_rate": 4.5514089870525515e-06, "loss": 18.1375, "step": 748 }, { "epoch": 0.013691118138446635, "grad_norm": 8.861577418193692, "learning_rate": 4.557501904036558e-06, "loss": 18.4961, "step": 749 }, { "epoch": 0.013709397334893158, "grad_norm": 8.00181313724855, "learning_rate": 4.563594821020564e-06, "loss": 17.7919, "step": 750 }, { "epoch": 0.013727676531339682, "grad_norm": 8.107133488958844, "learning_rate": 4.56968773800457e-06, "loss": 17.9478, "step": 751 }, { "epoch": 0.013745955727786207, "grad_norm": 8.527100099140132, "learning_rate": 4.575780654988576e-06, "loss": 18.3121, "step": 752 }, { "epoch": 0.01376423492423273, "grad_norm": 9.229603260412576, "learning_rate": 4.581873571972582e-06, "loss": 18.3729, "step": 753 }, { "epoch": 0.013782514120679256, "grad_norm": 7.851968233333457, "learning_rate": 4.5879664889565885e-06, "loss": 17.7429, "step": 754 }, { "epoch": 0.01380079331712578, "grad_norm": 8.500953189255293, "learning_rate": 4.594059405940594e-06, "loss": 18.412, "step": 755 }, { "epoch": 0.013819072513572303, "grad_norm": 8.261006107947916, "learning_rate": 4.6001523229246e-06, "loss": 18.09, "step": 756 }, { "epoch": 0.013837351710018828, "grad_norm": 8.336345469458829, "learning_rate": 4.606245239908607e-06, "loss": 18.2866, "step": 757 }, { "epoch": 0.013855630906465351, "grad_norm": 9.247618791384095, "learning_rate": 4.612338156892613e-06, "loss": 18.4629, "step": 758 }, { "epoch": 0.013873910102911877, "grad_norm": 7.692097753420125, "learning_rate": 4.618431073876618e-06, "loss": 18.0023, "step": 759 }, { "epoch": 0.0138921892993584, "grad_norm": 8.185703810583892, "learning_rate": 4.624523990860625e-06, "loss": 17.9522, "step": 760 }, { "epoch": 0.013910468495804924, "grad_norm": 9.73059709231243, "learning_rate": 4.630616907844631e-06, "loss": 18.8149, "step": 761 }, { "epoch": 0.013928747692251449, "grad_norm": 8.807976975651538, "learning_rate": 4.636709824828637e-06, "loss": 18.2019, "step": 762 }, { "epoch": 0.013947026888697972, "grad_norm": 8.807756941411888, "learning_rate": 4.642802741812643e-06, "loss": 18.582, "step": 763 }, { "epoch": 0.013965306085144498, "grad_norm": 7.734928300785946, "learning_rate": 4.6488956587966495e-06, "loss": 18.0705, "step": 764 }, { "epoch": 0.013983585281591021, "grad_norm": 9.32532334645556, "learning_rate": 4.654988575780655e-06, "loss": 18.4407, "step": 765 }, { "epoch": 0.014001864478037546, "grad_norm": 7.226946936382555, "learning_rate": 4.661081492764661e-06, "loss": 17.8439, "step": 766 }, { "epoch": 0.01402014367448407, "grad_norm": 7.562857315784674, "learning_rate": 4.667174409748668e-06, "loss": 17.507, "step": 767 }, { "epoch": 0.014038422870930593, "grad_norm": 8.533365880373301, "learning_rate": 4.673267326732674e-06, "loss": 18.3735, "step": 768 }, { "epoch": 0.014056702067377119, "grad_norm": 8.474421295874395, "learning_rate": 4.67936024371668e-06, "loss": 18.3085, "step": 769 }, { "epoch": 0.014074981263823642, "grad_norm": 7.930561966593505, "learning_rate": 4.685453160700686e-06, "loss": 17.8629, "step": 770 }, { "epoch": 0.014093260460270167, "grad_norm": 8.364342894711397, "learning_rate": 4.691546077684692e-06, "loss": 17.9165, "step": 771 }, { "epoch": 0.01411153965671669, "grad_norm": 8.644803044948478, "learning_rate": 4.697638994668698e-06, "loss": 18.3113, "step": 772 }, { "epoch": 0.014129818853163214, "grad_norm": 7.0042353905597246, "learning_rate": 4.703731911652704e-06, "loss": 17.5256, "step": 773 }, { "epoch": 0.01414809804960974, "grad_norm": 7.770288505567848, "learning_rate": 4.7098248286367105e-06, "loss": 17.6203, "step": 774 }, { "epoch": 0.014166377246056263, "grad_norm": 8.114082084007649, "learning_rate": 4.715917745620716e-06, "loss": 17.8323, "step": 775 }, { "epoch": 0.014184656442502788, "grad_norm": 8.107987396579302, "learning_rate": 4.722010662604722e-06, "loss": 18.0544, "step": 776 }, { "epoch": 0.014202935638949312, "grad_norm": 9.195067350381473, "learning_rate": 4.728103579588729e-06, "loss": 18.643, "step": 777 }, { "epoch": 0.014221214835395835, "grad_norm": 8.057056508611788, "learning_rate": 4.734196496572734e-06, "loss": 17.9456, "step": 778 }, { "epoch": 0.01423949403184236, "grad_norm": 8.593365449774803, "learning_rate": 4.740289413556741e-06, "loss": 18.4926, "step": 779 }, { "epoch": 0.014257773228288884, "grad_norm": 8.77392213344187, "learning_rate": 4.746382330540747e-06, "loss": 18.4204, "step": 780 }, { "epoch": 0.01427605242473541, "grad_norm": 8.686221466198258, "learning_rate": 4.7524752475247525e-06, "loss": 17.8882, "step": 781 }, { "epoch": 0.014294331621181933, "grad_norm": 7.543089350198794, "learning_rate": 4.758568164508759e-06, "loss": 17.8673, "step": 782 }, { "epoch": 0.014312610817628458, "grad_norm": 7.72067947204325, "learning_rate": 4.764661081492765e-06, "loss": 18.07, "step": 783 }, { "epoch": 0.014330890014074981, "grad_norm": 8.14505655255484, "learning_rate": 4.770753998476771e-06, "loss": 18.2828, "step": 784 }, { "epoch": 0.014349169210521505, "grad_norm": 13.760586711568584, "learning_rate": 4.776846915460777e-06, "loss": 17.8392, "step": 785 }, { "epoch": 0.01436744840696803, "grad_norm": 8.131829236787254, "learning_rate": 4.782939832444784e-06, "loss": 18.3155, "step": 786 }, { "epoch": 0.014385727603414554, "grad_norm": 8.479480779011643, "learning_rate": 4.789032749428789e-06, "loss": 18.1185, "step": 787 }, { "epoch": 0.014404006799861079, "grad_norm": 8.001242895150341, "learning_rate": 4.795125666412795e-06, "loss": 17.9873, "step": 788 }, { "epoch": 0.014422285996307602, "grad_norm": 7.5503732470374985, "learning_rate": 4.801218583396802e-06, "loss": 17.8135, "step": 789 }, { "epoch": 0.014440565192754126, "grad_norm": 9.701365399956416, "learning_rate": 4.807311500380808e-06, "loss": 18.2403, "step": 790 }, { "epoch": 0.014458844389200651, "grad_norm": 9.540336103150405, "learning_rate": 4.8134044173648135e-06, "loss": 18.6102, "step": 791 }, { "epoch": 0.014477123585647175, "grad_norm": 9.501216323824327, "learning_rate": 4.81949733434882e-06, "loss": 18.5699, "step": 792 }, { "epoch": 0.0144954027820937, "grad_norm": 9.226566125082897, "learning_rate": 4.825590251332826e-06, "loss": 18.44, "step": 793 }, { "epoch": 0.014513681978540223, "grad_norm": 8.286721370460413, "learning_rate": 4.831683168316832e-06, "loss": 18.0559, "step": 794 }, { "epoch": 0.014531961174986747, "grad_norm": 9.034488978203306, "learning_rate": 4.837776085300838e-06, "loss": 18.4985, "step": 795 }, { "epoch": 0.014550240371433272, "grad_norm": 8.494852627339926, "learning_rate": 4.843869002284845e-06, "loss": 18.5201, "step": 796 }, { "epoch": 0.014568519567879796, "grad_norm": 7.87204466443327, "learning_rate": 4.8499619192688506e-06, "loss": 17.3842, "step": 797 }, { "epoch": 0.01458679876432632, "grad_norm": 6.817930845924937, "learning_rate": 4.856054836252856e-06, "loss": 17.6466, "step": 798 }, { "epoch": 0.014605077960772844, "grad_norm": 8.6349003420488, "learning_rate": 4.862147753236863e-06, "loss": 18.4442, "step": 799 }, { "epoch": 0.01462335715721937, "grad_norm": 9.243550819410242, "learning_rate": 4.868240670220869e-06, "loss": 18.442, "step": 800 }, { "epoch": 0.014641636353665893, "grad_norm": 9.380566812642247, "learning_rate": 4.8743335872048744e-06, "loss": 18.4791, "step": 801 }, { "epoch": 0.014659915550112416, "grad_norm": 8.10614450572759, "learning_rate": 4.880426504188881e-06, "loss": 18.5559, "step": 802 }, { "epoch": 0.014678194746558942, "grad_norm": 8.647298812820837, "learning_rate": 4.886519421172887e-06, "loss": 18.3998, "step": 803 }, { "epoch": 0.014696473943005465, "grad_norm": 7.246022705060174, "learning_rate": 4.8926123381568926e-06, "loss": 17.6121, "step": 804 }, { "epoch": 0.01471475313945199, "grad_norm": 7.971771019689157, "learning_rate": 4.898705255140899e-06, "loss": 17.9529, "step": 805 }, { "epoch": 0.014733032335898514, "grad_norm": 8.063907379592704, "learning_rate": 4.904798172124905e-06, "loss": 18.2078, "step": 806 }, { "epoch": 0.014751311532345037, "grad_norm": 9.569172498445036, "learning_rate": 4.9108910891089115e-06, "loss": 18.7412, "step": 807 }, { "epoch": 0.014769590728791563, "grad_norm": 8.647993281620685, "learning_rate": 4.916984006092917e-06, "loss": 18.1838, "step": 808 }, { "epoch": 0.014787869925238086, "grad_norm": 8.090149228825357, "learning_rate": 4.923076923076924e-06, "loss": 18.025, "step": 809 }, { "epoch": 0.014806149121684611, "grad_norm": 7.454200251754739, "learning_rate": 4.92916984006093e-06, "loss": 17.6568, "step": 810 }, { "epoch": 0.014824428318131135, "grad_norm": 7.943819236198461, "learning_rate": 4.935262757044935e-06, "loss": 17.8122, "step": 811 }, { "epoch": 0.014842707514577658, "grad_norm": 7.699759087568516, "learning_rate": 4.941355674028942e-06, "loss": 17.7524, "step": 812 }, { "epoch": 0.014860986711024184, "grad_norm": 8.368089050350493, "learning_rate": 4.947448591012948e-06, "loss": 18.1158, "step": 813 }, { "epoch": 0.014879265907470707, "grad_norm": 7.805938306042278, "learning_rate": 4.953541507996954e-06, "loss": 18.0641, "step": 814 }, { "epoch": 0.014897545103917232, "grad_norm": 7.943826091683709, "learning_rate": 4.95963442498096e-06, "loss": 17.8384, "step": 815 }, { "epoch": 0.014915824300363756, "grad_norm": 7.757634044377665, "learning_rate": 4.965727341964966e-06, "loss": 17.9461, "step": 816 }, { "epoch": 0.014934103496810281, "grad_norm": 9.9770776203452, "learning_rate": 4.9718202589489725e-06, "loss": 18.1553, "step": 817 }, { "epoch": 0.014952382693256805, "grad_norm": 8.503532888039603, "learning_rate": 4.977913175932978e-06, "loss": 17.8678, "step": 818 }, { "epoch": 0.014970661889703328, "grad_norm": 8.106962674768528, "learning_rate": 4.984006092916984e-06, "loss": 17.8451, "step": 819 }, { "epoch": 0.014988941086149853, "grad_norm": 8.833702556308314, "learning_rate": 4.990099009900991e-06, "loss": 17.9596, "step": 820 }, { "epoch": 0.015007220282596377, "grad_norm": 7.937916712032483, "learning_rate": 4.996191926884996e-06, "loss": 17.9292, "step": 821 }, { "epoch": 0.015025499479042902, "grad_norm": 7.462546508111826, "learning_rate": 5.002284843869003e-06, "loss": 17.8169, "step": 822 }, { "epoch": 0.015043778675489426, "grad_norm": 9.39886461087385, "learning_rate": 5.00837776085301e-06, "loss": 18.4869, "step": 823 }, { "epoch": 0.015062057871935949, "grad_norm": 8.050725336094882, "learning_rate": 5.0144706778370145e-06, "loss": 17.8235, "step": 824 }, { "epoch": 0.015080337068382474, "grad_norm": 9.269730982093947, "learning_rate": 5.020563594821021e-06, "loss": 18.3107, "step": 825 }, { "epoch": 0.015098616264828998, "grad_norm": 6.197477814797283, "learning_rate": 5.026656511805027e-06, "loss": 17.2913, "step": 826 }, { "epoch": 0.015116895461275523, "grad_norm": 8.072231109131868, "learning_rate": 5.0327494287890335e-06, "loss": 18.1054, "step": 827 }, { "epoch": 0.015135174657722046, "grad_norm": 7.95630608538138, "learning_rate": 5.038842345773039e-06, "loss": 18.0839, "step": 828 }, { "epoch": 0.01515345385416857, "grad_norm": 7.566277226086446, "learning_rate": 5.044935262757046e-06, "loss": 18.2472, "step": 829 }, { "epoch": 0.015171733050615095, "grad_norm": 8.616470788304431, "learning_rate": 5.051028179741051e-06, "loss": 18.4674, "step": 830 }, { "epoch": 0.015190012247061619, "grad_norm": 7.0757863279990865, "learning_rate": 5.057121096725057e-06, "loss": 17.5577, "step": 831 }, { "epoch": 0.015208291443508144, "grad_norm": 8.079457005193985, "learning_rate": 5.063214013709063e-06, "loss": 18.3606, "step": 832 }, { "epoch": 0.015226570639954667, "grad_norm": 8.778226755851602, "learning_rate": 5.06930693069307e-06, "loss": 18.2777, "step": 833 }, { "epoch": 0.015244849836401193, "grad_norm": 8.512983358399698, "learning_rate": 5.075399847677076e-06, "loss": 18.5117, "step": 834 }, { "epoch": 0.015263129032847716, "grad_norm": 8.56006329627807, "learning_rate": 5.081492764661082e-06, "loss": 18.4841, "step": 835 }, { "epoch": 0.01528140822929424, "grad_norm": 9.288836121000003, "learning_rate": 5.087585681645088e-06, "loss": 18.3186, "step": 836 }, { "epoch": 0.015299687425740765, "grad_norm": 12.407714577797238, "learning_rate": 5.093678598629094e-06, "loss": 18.7556, "step": 837 }, { "epoch": 0.015317966622187288, "grad_norm": 8.347581071473323, "learning_rate": 5.0997715156131e-06, "loss": 18.1685, "step": 838 }, { "epoch": 0.015336245818633814, "grad_norm": 8.13250343081008, "learning_rate": 5.105864432597106e-06, "loss": 18.0985, "step": 839 }, { "epoch": 0.015354525015080337, "grad_norm": 8.810476732287764, "learning_rate": 5.1119573495811126e-06, "loss": 18.333, "step": 840 }, { "epoch": 0.01537280421152686, "grad_norm": 7.078936610191827, "learning_rate": 5.118050266565119e-06, "loss": 17.4763, "step": 841 }, { "epoch": 0.015391083407973386, "grad_norm": 7.42314765775181, "learning_rate": 5.124143183549125e-06, "loss": 17.6829, "step": 842 }, { "epoch": 0.01540936260441991, "grad_norm": 7.98124248088551, "learning_rate": 5.13023610053313e-06, "loss": 17.992, "step": 843 }, { "epoch": 0.015427641800866435, "grad_norm": 9.077498128006873, "learning_rate": 5.1363290175171365e-06, "loss": 18.2794, "step": 844 }, { "epoch": 0.015445920997312958, "grad_norm": 8.072782086561258, "learning_rate": 5.142421934501143e-06, "loss": 17.945, "step": 845 }, { "epoch": 0.015464200193759482, "grad_norm": 9.451917620903867, "learning_rate": 5.148514851485149e-06, "loss": 18.9475, "step": 846 }, { "epoch": 0.015482479390206007, "grad_norm": 8.796830074489074, "learning_rate": 5.1546077684691554e-06, "loss": 18.49, "step": 847 }, { "epoch": 0.01550075858665253, "grad_norm": 8.306849648605311, "learning_rate": 5.160700685453162e-06, "loss": 18.0991, "step": 848 }, { "epoch": 0.015519037783099056, "grad_norm": 7.459114632117044, "learning_rate": 5.166793602437167e-06, "loss": 17.8777, "step": 849 }, { "epoch": 0.015537316979545579, "grad_norm": 7.5045201012713045, "learning_rate": 5.172886519421173e-06, "loss": 17.9472, "step": 850 }, { "epoch": 0.015555596175992104, "grad_norm": 8.71227984792957, "learning_rate": 5.178979436405179e-06, "loss": 18.0386, "step": 851 }, { "epoch": 0.015573875372438628, "grad_norm": 7.765463154922769, "learning_rate": 5.185072353389186e-06, "loss": 17.9142, "step": 852 }, { "epoch": 0.015592154568885151, "grad_norm": 10.161510898240463, "learning_rate": 5.191165270373192e-06, "loss": 18.6878, "step": 853 }, { "epoch": 0.015610433765331676, "grad_norm": 9.565597600633472, "learning_rate": 5.197258187357198e-06, "loss": 18.3524, "step": 854 }, { "epoch": 0.0156287129617782, "grad_norm": 8.17644137642925, "learning_rate": 5.203351104341203e-06, "loss": 17.7937, "step": 855 }, { "epoch": 0.015646992158224723, "grad_norm": 9.283708677240496, "learning_rate": 5.20944402132521e-06, "loss": 18.368, "step": 856 }, { "epoch": 0.01566527135467125, "grad_norm": 10.0686745278984, "learning_rate": 5.2155369383092155e-06, "loss": 18.7853, "step": 857 }, { "epoch": 0.015683550551117774, "grad_norm": 7.431681847854302, "learning_rate": 5.221629855293222e-06, "loss": 17.6462, "step": 858 }, { "epoch": 0.015701829747564296, "grad_norm": 8.486462263281691, "learning_rate": 5.227722772277229e-06, "loss": 18.0921, "step": 859 }, { "epoch": 0.01572010894401082, "grad_norm": 7.586138553514978, "learning_rate": 5.2338156892612345e-06, "loss": 17.8335, "step": 860 }, { "epoch": 0.015738388140457346, "grad_norm": 7.120489311236346, "learning_rate": 5.239908606245241e-06, "loss": 17.7785, "step": 861 }, { "epoch": 0.01575666733690387, "grad_norm": 7.800959298571683, "learning_rate": 5.246001523229246e-06, "loss": 17.9972, "step": 862 }, { "epoch": 0.015774946533350393, "grad_norm": 8.62951022292379, "learning_rate": 5.252094440213253e-06, "loss": 18.4499, "step": 863 }, { "epoch": 0.01579322572979692, "grad_norm": 7.9691727795812195, "learning_rate": 5.258187357197258e-06, "loss": 18.3753, "step": 864 }, { "epoch": 0.015811504926243444, "grad_norm": 8.388302723530174, "learning_rate": 5.264280274181265e-06, "loss": 18.4872, "step": 865 }, { "epoch": 0.015829784122689965, "grad_norm": 8.049158898020348, "learning_rate": 5.270373191165271e-06, "loss": 17.7163, "step": 866 }, { "epoch": 0.01584806331913649, "grad_norm": 9.6038408512579, "learning_rate": 5.276466108149277e-06, "loss": 19.0707, "step": 867 }, { "epoch": 0.015866342515583016, "grad_norm": 7.382442367842405, "learning_rate": 5.282559025133282e-06, "loss": 17.6808, "step": 868 }, { "epoch": 0.015884621712029538, "grad_norm": 8.080470041328557, "learning_rate": 5.288651942117289e-06, "loss": 18.38, "step": 869 }, { "epoch": 0.015902900908476063, "grad_norm": 9.767620188020627, "learning_rate": 5.2947448591012955e-06, "loss": 18.9061, "step": 870 }, { "epoch": 0.015921180104922588, "grad_norm": 8.31662946648782, "learning_rate": 5.300837776085301e-06, "loss": 17.8949, "step": 871 }, { "epoch": 0.015939459301369113, "grad_norm": 7.701637780454898, "learning_rate": 5.306930693069308e-06, "loss": 17.6824, "step": 872 }, { "epoch": 0.015957738497815635, "grad_norm": 8.790161934550762, "learning_rate": 5.313023610053314e-06, "loss": 18.6216, "step": 873 }, { "epoch": 0.01597601769426216, "grad_norm": 7.674755782194827, "learning_rate": 5.319116527037319e-06, "loss": 17.5485, "step": 874 }, { "epoch": 0.015994296890708685, "grad_norm": 8.867388068015622, "learning_rate": 5.325209444021325e-06, "loss": 18.4883, "step": 875 }, { "epoch": 0.016012576087155207, "grad_norm": 9.1371586404217, "learning_rate": 5.331302361005332e-06, "loss": 18.6562, "step": 876 }, { "epoch": 0.016030855283601732, "grad_norm": 8.012671192217049, "learning_rate": 5.3373952779893375e-06, "loss": 17.9416, "step": 877 }, { "epoch": 0.016049134480048258, "grad_norm": 9.978771087488296, "learning_rate": 5.343488194973344e-06, "loss": 18.8468, "step": 878 }, { "epoch": 0.016067413676494783, "grad_norm": 9.18322782849521, "learning_rate": 5.349581111957351e-06, "loss": 18.4841, "step": 879 }, { "epoch": 0.016085692872941305, "grad_norm": 10.14432801291603, "learning_rate": 5.355674028941356e-06, "loss": 18.7332, "step": 880 }, { "epoch": 0.01610397206938783, "grad_norm": 7.964452973970781, "learning_rate": 5.361766945925362e-06, "loss": 18.2144, "step": 881 }, { "epoch": 0.016122251265834355, "grad_norm": 7.916189059576979, "learning_rate": 5.367859862909368e-06, "loss": 17.9018, "step": 882 }, { "epoch": 0.016140530462280877, "grad_norm": 8.399208049518332, "learning_rate": 5.373952779893375e-06, "loss": 18.0163, "step": 883 }, { "epoch": 0.016158809658727402, "grad_norm": 9.116458026740009, "learning_rate": 5.38004569687738e-06, "loss": 18.4248, "step": 884 }, { "epoch": 0.016177088855173927, "grad_norm": 8.630937961007142, "learning_rate": 5.386138613861387e-06, "loss": 18.3698, "step": 885 }, { "epoch": 0.01619536805162045, "grad_norm": 9.413585620699246, "learning_rate": 5.3922315308453936e-06, "loss": 18.6116, "step": 886 }, { "epoch": 0.016213647248066974, "grad_norm": 7.6368778975300184, "learning_rate": 5.3983244478293985e-06, "loss": 17.8458, "step": 887 }, { "epoch": 0.0162319264445135, "grad_norm": 7.403055825794283, "learning_rate": 5.404417364813404e-06, "loss": 17.747, "step": 888 }, { "epoch": 0.016250205640960025, "grad_norm": 7.766207662372749, "learning_rate": 5.410510281797411e-06, "loss": 18.1277, "step": 889 }, { "epoch": 0.016268484837406547, "grad_norm": 8.930033722117813, "learning_rate": 5.4166031987814174e-06, "loss": 18.3946, "step": 890 }, { "epoch": 0.016286764033853072, "grad_norm": 8.124914772358064, "learning_rate": 5.422696115765423e-06, "loss": 18.2002, "step": 891 }, { "epoch": 0.016305043230299597, "grad_norm": 9.296893407946603, "learning_rate": 5.42878903274943e-06, "loss": 18.4557, "step": 892 }, { "epoch": 0.01632332242674612, "grad_norm": 8.282641846666893, "learning_rate": 5.434881949733435e-06, "loss": 18.3414, "step": 893 }, { "epoch": 0.016341601623192644, "grad_norm": 7.70249475337819, "learning_rate": 5.440974866717441e-06, "loss": 18.0186, "step": 894 }, { "epoch": 0.01635988081963917, "grad_norm": 8.136410031229282, "learning_rate": 5.447067783701447e-06, "loss": 18.4672, "step": 895 }, { "epoch": 0.016378160016085695, "grad_norm": 9.9700678925168, "learning_rate": 5.453160700685454e-06, "loss": 18.9682, "step": 896 }, { "epoch": 0.016396439212532216, "grad_norm": 7.017176755988746, "learning_rate": 5.45925361766946e-06, "loss": 17.8646, "step": 897 }, { "epoch": 0.01641471840897874, "grad_norm": 9.103585413979374, "learning_rate": 5.465346534653466e-06, "loss": 18.3866, "step": 898 }, { "epoch": 0.016432997605425267, "grad_norm": 8.404999518682738, "learning_rate": 5.471439451637471e-06, "loss": 18.418, "step": 899 }, { "epoch": 0.01645127680187179, "grad_norm": 7.937828994479255, "learning_rate": 5.4775323686214776e-06, "loss": 18.3949, "step": 900 }, { "epoch": 0.016469555998318314, "grad_norm": 8.51573068887959, "learning_rate": 5.483625285605484e-06, "loss": 18.1735, "step": 901 }, { "epoch": 0.01648783519476484, "grad_norm": 8.529255174675132, "learning_rate": 5.48971820258949e-06, "loss": 18.2995, "step": 902 }, { "epoch": 0.01650611439121136, "grad_norm": 7.766746131636291, "learning_rate": 5.4958111195734965e-06, "loss": 17.3718, "step": 903 }, { "epoch": 0.016524393587657886, "grad_norm": 9.746610904544609, "learning_rate": 5.501904036557502e-06, "loss": 18.1831, "step": 904 }, { "epoch": 0.01654267278410441, "grad_norm": 8.476263194389706, "learning_rate": 5.507996953541509e-06, "loss": 17.8804, "step": 905 }, { "epoch": 0.016560951980550936, "grad_norm": 7.748230606111784, "learning_rate": 5.514089870525514e-06, "loss": 17.8924, "step": 906 }, { "epoch": 0.016579231176997458, "grad_norm": 7.432102607667173, "learning_rate": 5.52018278750952e-06, "loss": 18.1648, "step": 907 }, { "epoch": 0.016597510373443983, "grad_norm": 7.84694618480964, "learning_rate": 5.526275704493527e-06, "loss": 17.9913, "step": 908 }, { "epoch": 0.01661578956989051, "grad_norm": 7.404929998940032, "learning_rate": 5.532368621477533e-06, "loss": 17.8132, "step": 909 }, { "epoch": 0.01663406876633703, "grad_norm": 9.29751618630069, "learning_rate": 5.538461538461539e-06, "loss": 18.5345, "step": 910 }, { "epoch": 0.016652347962783556, "grad_norm": 7.417833178895084, "learning_rate": 5.544554455445545e-06, "loss": 17.6646, "step": 911 }, { "epoch": 0.01667062715923008, "grad_norm": 9.70772100620693, "learning_rate": 5.550647372429551e-06, "loss": 18.7744, "step": 912 }, { "epoch": 0.016688906355676606, "grad_norm": 9.072232524086857, "learning_rate": 5.556740289413557e-06, "loss": 18.3423, "step": 913 }, { "epoch": 0.016707185552123128, "grad_norm": 9.07910036742243, "learning_rate": 5.562833206397563e-06, "loss": 18.2481, "step": 914 }, { "epoch": 0.016725464748569653, "grad_norm": 9.809361373009457, "learning_rate": 5.56892612338157e-06, "loss": 18.367, "step": 915 }, { "epoch": 0.01674374394501618, "grad_norm": 10.115596121390496, "learning_rate": 5.575019040365576e-06, "loss": 18.3545, "step": 916 }, { "epoch": 0.0167620231414627, "grad_norm": 6.7708275169011305, "learning_rate": 5.581111957349582e-06, "loss": 17.5047, "step": 917 }, { "epoch": 0.016780302337909225, "grad_norm": 8.138730437640659, "learning_rate": 5.587204874333587e-06, "loss": 18.3553, "step": 918 }, { "epoch": 0.01679858153435575, "grad_norm": 9.609285718167515, "learning_rate": 5.593297791317594e-06, "loss": 18.3202, "step": 919 }, { "epoch": 0.016816860730802272, "grad_norm": 8.384124976958663, "learning_rate": 5.5993907083015995e-06, "loss": 18.2082, "step": 920 }, { "epoch": 0.016835139927248798, "grad_norm": 9.14439730562192, "learning_rate": 5.605483625285606e-06, "loss": 17.9248, "step": 921 }, { "epoch": 0.016853419123695323, "grad_norm": 8.996609322297227, "learning_rate": 5.611576542269612e-06, "loss": 18.3729, "step": 922 }, { "epoch": 0.016871698320141848, "grad_norm": 7.315932421270537, "learning_rate": 5.6176694592536185e-06, "loss": 17.8854, "step": 923 }, { "epoch": 0.01688997751658837, "grad_norm": 8.122807052797084, "learning_rate": 5.623762376237625e-06, "loss": 18.3153, "step": 924 }, { "epoch": 0.016908256713034895, "grad_norm": 7.902339895701034, "learning_rate": 5.62985529322163e-06, "loss": 18.2543, "step": 925 }, { "epoch": 0.01692653590948142, "grad_norm": 9.590556579018623, "learning_rate": 5.635948210205637e-06, "loss": 19.1718, "step": 926 }, { "epoch": 0.016944815105927942, "grad_norm": 8.795053677679899, "learning_rate": 5.642041127189642e-06, "loss": 18.236, "step": 927 }, { "epoch": 0.016963094302374467, "grad_norm": 8.946710023619962, "learning_rate": 5.648134044173649e-06, "loss": 18.3254, "step": 928 }, { "epoch": 0.016981373498820992, "grad_norm": 7.717632589221404, "learning_rate": 5.654226961157655e-06, "loss": 17.883, "step": 929 }, { "epoch": 0.016999652695267518, "grad_norm": 8.818060009422503, "learning_rate": 5.660319878141661e-06, "loss": 18.7201, "step": 930 }, { "epoch": 0.01701793189171404, "grad_norm": 7.428304896972738, "learning_rate": 5.666412795125666e-06, "loss": 17.6157, "step": 931 }, { "epoch": 0.017036211088160565, "grad_norm": 8.63741944219696, "learning_rate": 5.672505712109673e-06, "loss": 18.2596, "step": 932 }, { "epoch": 0.01705449028460709, "grad_norm": 8.776657541381901, "learning_rate": 5.678598629093679e-06, "loss": 18.8281, "step": 933 }, { "epoch": 0.01707276948105361, "grad_norm": 8.657479987630891, "learning_rate": 5.684691546077685e-06, "loss": 17.9952, "step": 934 }, { "epoch": 0.017091048677500137, "grad_norm": 8.165623263296865, "learning_rate": 5.690784463061692e-06, "loss": 18.0903, "step": 935 }, { "epoch": 0.017109327873946662, "grad_norm": 7.746609654316087, "learning_rate": 5.6968773800456976e-06, "loss": 17.9119, "step": 936 }, { "epoch": 0.017127607070393184, "grad_norm": 6.5051760343841245, "learning_rate": 5.702970297029703e-06, "loss": 17.2384, "step": 937 }, { "epoch": 0.01714588626683971, "grad_norm": 10.149156592353432, "learning_rate": 5.709063214013709e-06, "loss": 18.8406, "step": 938 }, { "epoch": 0.017164165463286234, "grad_norm": 7.539662391577778, "learning_rate": 5.715156130997716e-06, "loss": 17.7378, "step": 939 }, { "epoch": 0.01718244465973276, "grad_norm": 7.8752178884870565, "learning_rate": 5.7212490479817215e-06, "loss": 17.9224, "step": 940 }, { "epoch": 0.01720072385617928, "grad_norm": 8.16065607456676, "learning_rate": 5.727341964965728e-06, "loss": 18.0614, "step": 941 }, { "epoch": 0.017219003052625807, "grad_norm": 9.572267813617717, "learning_rate": 5.733434881949735e-06, "loss": 18.6002, "step": 942 }, { "epoch": 0.017237282249072332, "grad_norm": 8.646762787246, "learning_rate": 5.73952779893374e-06, "loss": 17.9923, "step": 943 }, { "epoch": 0.017255561445518854, "grad_norm": 7.53253733262006, "learning_rate": 5.745620715917745e-06, "loss": 17.9094, "step": 944 }, { "epoch": 0.01727384064196538, "grad_norm": 7.52707356259082, "learning_rate": 5.751713632901752e-06, "loss": 17.6118, "step": 945 }, { "epoch": 0.017292119838411904, "grad_norm": 8.480907437056311, "learning_rate": 5.7578065498857585e-06, "loss": 17.9889, "step": 946 }, { "epoch": 0.01731039903485843, "grad_norm": 7.035617828510175, "learning_rate": 5.763899466869764e-06, "loss": 17.6165, "step": 947 }, { "epoch": 0.01732867823130495, "grad_norm": 8.0693542132295, "learning_rate": 5.769992383853771e-06, "loss": 18.1385, "step": 948 }, { "epoch": 0.017346957427751476, "grad_norm": 9.662756503100724, "learning_rate": 5.776085300837777e-06, "loss": 19.1192, "step": 949 }, { "epoch": 0.017365236624198, "grad_norm": 7.757126477800354, "learning_rate": 5.7821782178217824e-06, "loss": 17.9017, "step": 950 }, { "epoch": 0.017383515820644523, "grad_norm": 9.677005352029196, "learning_rate": 5.788271134805788e-06, "loss": 18.4819, "step": 951 }, { "epoch": 0.01740179501709105, "grad_norm": 7.30926952903755, "learning_rate": 5.794364051789795e-06, "loss": 17.8772, "step": 952 }, { "epoch": 0.017420074213537574, "grad_norm": 8.325157678978579, "learning_rate": 5.800456968773801e-06, "loss": 18.1306, "step": 953 }, { "epoch": 0.017438353409984095, "grad_norm": 7.731164288521678, "learning_rate": 5.806549885757807e-06, "loss": 17.9777, "step": 954 }, { "epoch": 0.01745663260643062, "grad_norm": 7.6792569282850485, "learning_rate": 5.812642802741814e-06, "loss": 18.0973, "step": 955 }, { "epoch": 0.017474911802877146, "grad_norm": 8.399265872828295, "learning_rate": 5.818735719725819e-06, "loss": 18.2354, "step": 956 }, { "epoch": 0.01749319099932367, "grad_norm": 8.10916575336011, "learning_rate": 5.824828636709825e-06, "loss": 17.878, "step": 957 }, { "epoch": 0.017511470195770193, "grad_norm": 7.027505730094863, "learning_rate": 5.830921553693831e-06, "loss": 17.8894, "step": 958 }, { "epoch": 0.017529749392216718, "grad_norm": 7.652531931486096, "learning_rate": 5.837014470677838e-06, "loss": 18.224, "step": 959 }, { "epoch": 0.017548028588663243, "grad_norm": 10.327063870382897, "learning_rate": 5.843107387661843e-06, "loss": 17.9355, "step": 960 }, { "epoch": 0.017566307785109765, "grad_norm": 8.613566541365316, "learning_rate": 5.84920030464585e-06, "loss": 18.0343, "step": 961 }, { "epoch": 0.01758458698155629, "grad_norm": 8.57057882780188, "learning_rate": 5.855293221629855e-06, "loss": 18.196, "step": 962 }, { "epoch": 0.017602866178002816, "grad_norm": 7.938972752095507, "learning_rate": 5.8613861386138615e-06, "loss": 17.6136, "step": 963 }, { "epoch": 0.01762114537444934, "grad_norm": 7.478286976202789, "learning_rate": 5.867479055597868e-06, "loss": 17.9303, "step": 964 }, { "epoch": 0.017639424570895863, "grad_norm": 8.883723962903053, "learning_rate": 5.873571972581874e-06, "loss": 18.2692, "step": 965 }, { "epoch": 0.017657703767342388, "grad_norm": 8.651125461568094, "learning_rate": 5.8796648895658805e-06, "loss": 18.1206, "step": 966 }, { "epoch": 0.017675982963788913, "grad_norm": 11.154753319717942, "learning_rate": 5.885757806549886e-06, "loss": 19.2872, "step": 967 }, { "epoch": 0.017694262160235435, "grad_norm": 10.085538888665493, "learning_rate": 5.891850723533893e-06, "loss": 18.393, "step": 968 }, { "epoch": 0.01771254135668196, "grad_norm": 7.843160528454247, "learning_rate": 5.897943640517898e-06, "loss": 18.0221, "step": 969 }, { "epoch": 0.017730820553128485, "grad_norm": 7.979372852978716, "learning_rate": 5.904036557501904e-06, "loss": 17.9246, "step": 970 }, { "epoch": 0.017749099749575007, "grad_norm": 9.453295281409622, "learning_rate": 5.910129474485911e-06, "loss": 18.7617, "step": 971 }, { "epoch": 0.017767378946021532, "grad_norm": 8.043964875887363, "learning_rate": 5.916222391469917e-06, "loss": 17.8481, "step": 972 }, { "epoch": 0.017785658142468058, "grad_norm": 8.844986788228319, "learning_rate": 5.922315308453923e-06, "loss": 18.457, "step": 973 }, { "epoch": 0.017803937338914583, "grad_norm": 9.057668393721022, "learning_rate": 5.928408225437929e-06, "loss": 18.1555, "step": 974 }, { "epoch": 0.017822216535361105, "grad_norm": 8.104869602718992, "learning_rate": 5.934501142421935e-06, "loss": 17.8352, "step": 975 }, { "epoch": 0.01784049573180763, "grad_norm": 7.66250929390229, "learning_rate": 5.940594059405941e-06, "loss": 17.9034, "step": 976 }, { "epoch": 0.017858774928254155, "grad_norm": 9.66899303971445, "learning_rate": 5.946686976389947e-06, "loss": 18.6847, "step": 977 }, { "epoch": 0.017877054124700677, "grad_norm": 8.201507650990836, "learning_rate": 5.952779893373953e-06, "loss": 18.3439, "step": 978 }, { "epoch": 0.017895333321147202, "grad_norm": 7.123313056914936, "learning_rate": 5.95887281035796e-06, "loss": 17.396, "step": 979 }, { "epoch": 0.017913612517593727, "grad_norm": 8.536481554834298, "learning_rate": 5.964965727341966e-06, "loss": 18.5038, "step": 980 }, { "epoch": 0.017931891714040252, "grad_norm": 6.346971469399508, "learning_rate": 5.971058644325971e-06, "loss": 17.1418, "step": 981 }, { "epoch": 0.017950170910486774, "grad_norm": 7.619923063560709, "learning_rate": 5.977151561309978e-06, "loss": 17.7694, "step": 982 }, { "epoch": 0.0179684501069333, "grad_norm": 7.432489736143195, "learning_rate": 5.9832444782939835e-06, "loss": 17.6501, "step": 983 }, { "epoch": 0.017986729303379825, "grad_norm": 8.525453283530835, "learning_rate": 5.98933739527799e-06, "loss": 18.0, "step": 984 }, { "epoch": 0.018005008499826346, "grad_norm": 8.319849636997306, "learning_rate": 5.995430312261996e-06, "loss": 17.8633, "step": 985 }, { "epoch": 0.01802328769627287, "grad_norm": 9.110643654811224, "learning_rate": 6.0015232292460024e-06, "loss": 18.3716, "step": 986 }, { "epoch": 0.018041566892719397, "grad_norm": 7.473648054911621, "learning_rate": 6.007616146230009e-06, "loss": 17.6931, "step": 987 }, { "epoch": 0.01805984608916592, "grad_norm": 8.189401317764087, "learning_rate": 6.013709063214014e-06, "loss": 17.9509, "step": 988 }, { "epoch": 0.018078125285612444, "grad_norm": 8.063947143779858, "learning_rate": 6.01980198019802e-06, "loss": 18.3433, "step": 989 }, { "epoch": 0.01809640448205897, "grad_norm": 7.397779832323864, "learning_rate": 6.025894897182026e-06, "loss": 17.6071, "step": 990 }, { "epoch": 0.018114683678505494, "grad_norm": 7.525569710600015, "learning_rate": 6.031987814166033e-06, "loss": 17.718, "step": 991 }, { "epoch": 0.018132962874952016, "grad_norm": 8.473776849496664, "learning_rate": 6.038080731150039e-06, "loss": 18.1782, "step": 992 }, { "epoch": 0.01815124207139854, "grad_norm": 8.206811645257254, "learning_rate": 6.044173648134045e-06, "loss": 17.9923, "step": 993 }, { "epoch": 0.018169521267845067, "grad_norm": 7.4978382746034296, "learning_rate": 6.05026656511805e-06, "loss": 18.3228, "step": 994 }, { "epoch": 0.01818780046429159, "grad_norm": 8.273357499146584, "learning_rate": 6.056359482102057e-06, "loss": 18.4123, "step": 995 }, { "epoch": 0.018206079660738114, "grad_norm": 6.892464190887397, "learning_rate": 6.0624523990860626e-06, "loss": 17.5935, "step": 996 }, { "epoch": 0.01822435885718464, "grad_norm": 7.848376155985841, "learning_rate": 6.068545316070069e-06, "loss": 17.9181, "step": 997 }, { "epoch": 0.018242638053631164, "grad_norm": 8.05134430330745, "learning_rate": 6.074638233054076e-06, "loss": 18.1096, "step": 998 }, { "epoch": 0.018260917250077686, "grad_norm": 8.255534490186607, "learning_rate": 6.0807311500380815e-06, "loss": 17.9048, "step": 999 }, { "epoch": 0.01827919644652421, "grad_norm": 7.252054367954556, "learning_rate": 6.0868240670220864e-06, "loss": 17.9062, "step": 1000 }, { "epoch": 0.018297475642970736, "grad_norm": 9.036905165252518, "learning_rate": 6.092916984006093e-06, "loss": 18.292, "step": 1001 }, { "epoch": 0.018315754839417258, "grad_norm": 7.928256324043463, "learning_rate": 6.0990099009901e-06, "loss": 17.8227, "step": 1002 }, { "epoch": 0.018334034035863783, "grad_norm": 9.459523185652099, "learning_rate": 6.105102817974105e-06, "loss": 18.4981, "step": 1003 }, { "epoch": 0.01835231323231031, "grad_norm": 9.253943259491349, "learning_rate": 6.111195734958112e-06, "loss": 18.2896, "step": 1004 }, { "epoch": 0.01837059242875683, "grad_norm": 7.049122820792015, "learning_rate": 6.117288651942118e-06, "loss": 17.567, "step": 1005 }, { "epoch": 0.018388871625203355, "grad_norm": 7.454126276420805, "learning_rate": 6.1233815689261235e-06, "loss": 17.6288, "step": 1006 }, { "epoch": 0.01840715082164988, "grad_norm": 7.554500865570359, "learning_rate": 6.129474485910129e-06, "loss": 17.5567, "step": 1007 }, { "epoch": 0.018425430018096406, "grad_norm": 7.5218192319804595, "learning_rate": 6.135567402894136e-06, "loss": 17.662, "step": 1008 }, { "epoch": 0.018443709214542928, "grad_norm": 8.684509200583053, "learning_rate": 6.1416603198781425e-06, "loss": 18.411, "step": 1009 }, { "epoch": 0.018461988410989453, "grad_norm": 8.410385879818696, "learning_rate": 6.147753236862148e-06, "loss": 17.9104, "step": 1010 }, { "epoch": 0.018480267607435978, "grad_norm": 7.885404566415697, "learning_rate": 6.153846153846155e-06, "loss": 18.0883, "step": 1011 }, { "epoch": 0.0184985468038825, "grad_norm": 8.439882645200115, "learning_rate": 6.159939070830161e-06, "loss": 18.3346, "step": 1012 }, { "epoch": 0.018516826000329025, "grad_norm": 8.40871647134056, "learning_rate": 6.166031987814166e-06, "loss": 18.1789, "step": 1013 }, { "epoch": 0.01853510519677555, "grad_norm": 8.60571070495959, "learning_rate": 6.172124904798172e-06, "loss": 18.5632, "step": 1014 }, { "epoch": 0.018553384393222076, "grad_norm": 8.243216980529974, "learning_rate": 6.178217821782179e-06, "loss": 18.0231, "step": 1015 }, { "epoch": 0.018571663589668597, "grad_norm": 7.86378260881206, "learning_rate": 6.1843107387661845e-06, "loss": 17.7789, "step": 1016 }, { "epoch": 0.018589942786115123, "grad_norm": 8.820936931499855, "learning_rate": 6.190403655750191e-06, "loss": 18.6289, "step": 1017 }, { "epoch": 0.018608221982561648, "grad_norm": 8.782521896358636, "learning_rate": 6.196496572734198e-06, "loss": 18.4418, "step": 1018 }, { "epoch": 0.01862650117900817, "grad_norm": 6.899410432972345, "learning_rate": 6.202589489718203e-06, "loss": 17.4621, "step": 1019 }, { "epoch": 0.018644780375454695, "grad_norm": 8.786703836897136, "learning_rate": 6.208682406702209e-06, "loss": 18.611, "step": 1020 }, { "epoch": 0.01866305957190122, "grad_norm": 7.575104462004002, "learning_rate": 6.214775323686215e-06, "loss": 17.7327, "step": 1021 }, { "epoch": 0.018681338768347742, "grad_norm": 7.787769432834653, "learning_rate": 6.220868240670222e-06, "loss": 18.0189, "step": 1022 }, { "epoch": 0.018699617964794267, "grad_norm": 8.26130050674653, "learning_rate": 6.226961157654227e-06, "loss": 18.0139, "step": 1023 }, { "epoch": 0.018717897161240792, "grad_norm": 7.317306905270767, "learning_rate": 6.233054074638234e-06, "loss": 17.6614, "step": 1024 }, { "epoch": 0.018736176357687317, "grad_norm": 7.253990770261824, "learning_rate": 6.239146991622239e-06, "loss": 17.7952, "step": 1025 }, { "epoch": 0.01875445555413384, "grad_norm": 7.277989169206523, "learning_rate": 6.2452399086062455e-06, "loss": 17.9357, "step": 1026 }, { "epoch": 0.018772734750580364, "grad_norm": 8.994043472015644, "learning_rate": 6.251332825590252e-06, "loss": 18.5219, "step": 1027 }, { "epoch": 0.01879101394702689, "grad_norm": 10.04532201853213, "learning_rate": 6.257425742574258e-06, "loss": 18.5184, "step": 1028 }, { "epoch": 0.01880929314347341, "grad_norm": 8.069337617902569, "learning_rate": 6.2635186595582645e-06, "loss": 18.1767, "step": 1029 }, { "epoch": 0.018827572339919937, "grad_norm": 8.78105839296198, "learning_rate": 6.26961157654227e-06, "loss": 18.4066, "step": 1030 }, { "epoch": 0.018845851536366462, "grad_norm": 7.886476272784995, "learning_rate": 6.275704493526277e-06, "loss": 17.7988, "step": 1031 }, { "epoch": 0.018864130732812987, "grad_norm": 7.714648845995187, "learning_rate": 6.281797410510282e-06, "loss": 17.9388, "step": 1032 }, { "epoch": 0.01888240992925951, "grad_norm": 9.323420103128699, "learning_rate": 6.287890327494288e-06, "loss": 18.2556, "step": 1033 }, { "epoch": 0.018900689125706034, "grad_norm": 8.036862821596728, "learning_rate": 6.293983244478294e-06, "loss": 17.8791, "step": 1034 }, { "epoch": 0.01891896832215256, "grad_norm": 7.757507873745402, "learning_rate": 6.300076161462301e-06, "loss": 17.9595, "step": 1035 }, { "epoch": 0.01893724751859908, "grad_norm": 7.829337929170519, "learning_rate": 6.306169078446307e-06, "loss": 17.8935, "step": 1036 }, { "epoch": 0.018955526715045606, "grad_norm": 7.992430925493884, "learning_rate": 6.312261995430313e-06, "loss": 17.9734, "step": 1037 }, { "epoch": 0.01897380591149213, "grad_norm": 7.085494698683286, "learning_rate": 6.318354912414319e-06, "loss": 17.7806, "step": 1038 }, { "epoch": 0.018992085107938653, "grad_norm": 7.997564969386241, "learning_rate": 6.324447829398325e-06, "loss": 18.1079, "step": 1039 }, { "epoch": 0.01901036430438518, "grad_norm": 7.5883916646067755, "learning_rate": 6.330540746382331e-06, "loss": 17.9337, "step": 1040 }, { "epoch": 0.019028643500831704, "grad_norm": 7.976100637615995, "learning_rate": 6.336633663366337e-06, "loss": 17.9246, "step": 1041 }, { "epoch": 0.01904692269727823, "grad_norm": 8.071392426167597, "learning_rate": 6.3427265803503435e-06, "loss": 18.0945, "step": 1042 }, { "epoch": 0.01906520189372475, "grad_norm": 9.530560059122761, "learning_rate": 6.34881949733435e-06, "loss": 18.1668, "step": 1043 }, { "epoch": 0.019083481090171276, "grad_norm": 8.56340449411634, "learning_rate": 6.354912414318355e-06, "loss": 18.4313, "step": 1044 }, { "epoch": 0.0191017602866178, "grad_norm": 8.121857464759803, "learning_rate": 6.361005331302361e-06, "loss": 17.8877, "step": 1045 }, { "epoch": 0.019120039483064323, "grad_norm": 8.417793702742772, "learning_rate": 6.3670982482863674e-06, "loss": 18.3695, "step": 1046 }, { "epoch": 0.01913831867951085, "grad_norm": 7.6983153438617835, "learning_rate": 6.373191165270374e-06, "loss": 18.035, "step": 1047 }, { "epoch": 0.019156597875957374, "grad_norm": 8.893385558769452, "learning_rate": 6.37928408225438e-06, "loss": 18.3983, "step": 1048 }, { "epoch": 0.0191748770724039, "grad_norm": 10.604928656082148, "learning_rate": 6.385376999238386e-06, "loss": 19.0163, "step": 1049 }, { "epoch": 0.01919315626885042, "grad_norm": 8.015052112864332, "learning_rate": 6.391469916222392e-06, "loss": 18.0764, "step": 1050 }, { "epoch": 0.019211435465296946, "grad_norm": 7.794808472928623, "learning_rate": 6.397562833206398e-06, "loss": 18.0088, "step": 1051 }, { "epoch": 0.01922971466174347, "grad_norm": 9.257359827663915, "learning_rate": 6.403655750190404e-06, "loss": 18.2095, "step": 1052 }, { "epoch": 0.019247993858189993, "grad_norm": 9.54466142041045, "learning_rate": 6.40974866717441e-06, "loss": 18.9175, "step": 1053 }, { "epoch": 0.019266273054636518, "grad_norm": 8.425516813446993, "learning_rate": 6.415841584158417e-06, "loss": 18.5695, "step": 1054 }, { "epoch": 0.019284552251083043, "grad_norm": 8.821858127632892, "learning_rate": 6.421934501142423e-06, "loss": 18.0299, "step": 1055 }, { "epoch": 0.019302831447529565, "grad_norm": 8.460954236609926, "learning_rate": 6.428027418126429e-06, "loss": 18.195, "step": 1056 }, { "epoch": 0.01932111064397609, "grad_norm": 7.721241701220841, "learning_rate": 6.434120335110434e-06, "loss": 17.8886, "step": 1057 }, { "epoch": 0.019339389840422615, "grad_norm": 8.513571952984433, "learning_rate": 6.440213252094441e-06, "loss": 18.3746, "step": 1058 }, { "epoch": 0.01935766903686914, "grad_norm": 7.964369243429046, "learning_rate": 6.4463061690784465e-06, "loss": 18.0392, "step": 1059 }, { "epoch": 0.019375948233315662, "grad_norm": 8.517667077055881, "learning_rate": 6.452399086062453e-06, "loss": 18.6715, "step": 1060 }, { "epoch": 0.019394227429762188, "grad_norm": 8.02065554970321, "learning_rate": 6.458492003046459e-06, "loss": 17.7864, "step": 1061 }, { "epoch": 0.019412506626208713, "grad_norm": 8.48795408189634, "learning_rate": 6.4645849200304655e-06, "loss": 18.2628, "step": 1062 }, { "epoch": 0.019430785822655235, "grad_norm": 8.544082265276332, "learning_rate": 6.47067783701447e-06, "loss": 18.0344, "step": 1063 }, { "epoch": 0.01944906501910176, "grad_norm": 9.285600661351275, "learning_rate": 6.476770753998477e-06, "loss": 18.3072, "step": 1064 }, { "epoch": 0.019467344215548285, "grad_norm": 10.555387468574859, "learning_rate": 6.482863670982484e-06, "loss": 18.9304, "step": 1065 }, { "epoch": 0.01948562341199481, "grad_norm": 7.76696304450934, "learning_rate": 6.488956587966489e-06, "loss": 18.0832, "step": 1066 }, { "epoch": 0.019503902608441332, "grad_norm": 7.212742111061074, "learning_rate": 6.495049504950496e-06, "loss": 17.7941, "step": 1067 }, { "epoch": 0.019522181804887857, "grad_norm": 7.273730185764739, "learning_rate": 6.501142421934502e-06, "loss": 17.5107, "step": 1068 }, { "epoch": 0.019540461001334383, "grad_norm": 8.713701016851418, "learning_rate": 6.5072353389185075e-06, "loss": 18.1767, "step": 1069 }, { "epoch": 0.019558740197780904, "grad_norm": 7.520575463727811, "learning_rate": 6.513328255902513e-06, "loss": 17.9406, "step": 1070 }, { "epoch": 0.01957701939422743, "grad_norm": 7.842937496796728, "learning_rate": 6.51942117288652e-06, "loss": 18.0233, "step": 1071 }, { "epoch": 0.019595298590673955, "grad_norm": 8.061706991842145, "learning_rate": 6.525514089870526e-06, "loss": 17.7602, "step": 1072 }, { "epoch": 0.019613577787120477, "grad_norm": 7.6223938861675835, "learning_rate": 6.531607006854532e-06, "loss": 18.1191, "step": 1073 }, { "epoch": 0.019631856983567002, "grad_norm": 8.279127166238007, "learning_rate": 6.537699923838539e-06, "loss": 18.3115, "step": 1074 }, { "epoch": 0.019650136180013527, "grad_norm": 7.188251509364014, "learning_rate": 6.543792840822545e-06, "loss": 17.3865, "step": 1075 }, { "epoch": 0.019668415376460052, "grad_norm": 7.216757458949848, "learning_rate": 6.54988575780655e-06, "loss": 17.6607, "step": 1076 }, { "epoch": 0.019686694572906574, "grad_norm": 8.670515815497264, "learning_rate": 6.555978674790556e-06, "loss": 18.1868, "step": 1077 }, { "epoch": 0.0197049737693531, "grad_norm": 8.110697301247246, "learning_rate": 6.562071591774563e-06, "loss": 18.0288, "step": 1078 }, { "epoch": 0.019723252965799624, "grad_norm": 8.809505809170233, "learning_rate": 6.5681645087585685e-06, "loss": 18.0671, "step": 1079 }, { "epoch": 0.019741532162246146, "grad_norm": 9.605865738512373, "learning_rate": 6.574257425742575e-06, "loss": 18.4982, "step": 1080 }, { "epoch": 0.01975981135869267, "grad_norm": 7.916135171417083, "learning_rate": 6.580350342726582e-06, "loss": 18.1144, "step": 1081 }, { "epoch": 0.019778090555139197, "grad_norm": 7.217096870695601, "learning_rate": 6.586443259710587e-06, "loss": 18.0143, "step": 1082 }, { "epoch": 0.019796369751585722, "grad_norm": 7.679991772771017, "learning_rate": 6.592536176694593e-06, "loss": 18.0063, "step": 1083 }, { "epoch": 0.019814648948032244, "grad_norm": 8.20226140412346, "learning_rate": 6.598629093678599e-06, "loss": 18.4665, "step": 1084 }, { "epoch": 0.01983292814447877, "grad_norm": 9.078351119787966, "learning_rate": 6.6047220106626056e-06, "loss": 18.7463, "step": 1085 }, { "epoch": 0.019851207340925294, "grad_norm": 7.516852386672339, "learning_rate": 6.610814927646611e-06, "loss": 17.775, "step": 1086 }, { "epoch": 0.019869486537371816, "grad_norm": 8.181787029821258, "learning_rate": 6.616907844630618e-06, "loss": 18.472, "step": 1087 }, { "epoch": 0.01988776573381834, "grad_norm": 6.560494302351963, "learning_rate": 6.623000761614623e-06, "loss": 17.5194, "step": 1088 }, { "epoch": 0.019906044930264866, "grad_norm": 7.901692922390689, "learning_rate": 6.6290936785986294e-06, "loss": 18.0642, "step": 1089 }, { "epoch": 0.019924324126711388, "grad_norm": 8.061066613367577, "learning_rate": 6.635186595582635e-06, "loss": 18.0876, "step": 1090 }, { "epoch": 0.019942603323157913, "grad_norm": 9.454097103350708, "learning_rate": 6.641279512566642e-06, "loss": 18.8881, "step": 1091 }, { "epoch": 0.01996088251960444, "grad_norm": 8.756284787713158, "learning_rate": 6.647372429550648e-06, "loss": 18.2767, "step": 1092 }, { "epoch": 0.019979161716050964, "grad_norm": 7.533720389804396, "learning_rate": 6.653465346534654e-06, "loss": 18.2497, "step": 1093 }, { "epoch": 0.019997440912497486, "grad_norm": 9.298600283610812, "learning_rate": 6.659558263518661e-06, "loss": 18.5542, "step": 1094 }, { "epoch": 0.02001572010894401, "grad_norm": 7.740774888213946, "learning_rate": 6.665651180502666e-06, "loss": 18.2266, "step": 1095 }, { "epoch": 0.020033999305390536, "grad_norm": 8.909679220515974, "learning_rate": 6.671744097486672e-06, "loss": 18.6344, "step": 1096 }, { "epoch": 0.020052278501837058, "grad_norm": 7.63494248931986, "learning_rate": 6.677837014470678e-06, "loss": 17.5885, "step": 1097 }, { "epoch": 0.020070557698283583, "grad_norm": 8.920698496042283, "learning_rate": 6.683929931454685e-06, "loss": 18.087, "step": 1098 }, { "epoch": 0.020088836894730108, "grad_norm": 7.6970425992182765, "learning_rate": 6.690022848438691e-06, "loss": 17.8351, "step": 1099 }, { "epoch": 0.020107116091176633, "grad_norm": 9.550901562462453, "learning_rate": 6.696115765422697e-06, "loss": 18.9141, "step": 1100 }, { "epoch": 0.020125395287623155, "grad_norm": 7.903962156781939, "learning_rate": 6.702208682406702e-06, "loss": 17.9881, "step": 1101 }, { "epoch": 0.02014367448406968, "grad_norm": 9.009575604456296, "learning_rate": 6.7083015993907085e-06, "loss": 18.5816, "step": 1102 }, { "epoch": 0.020161953680516206, "grad_norm": 8.063362266384035, "learning_rate": 6.714394516374715e-06, "loss": 17.9211, "step": 1103 }, { "epoch": 0.020180232876962727, "grad_norm": 7.689532167015797, "learning_rate": 6.720487433358721e-06, "loss": 17.8806, "step": 1104 }, { "epoch": 0.020198512073409253, "grad_norm": 8.370291445199264, "learning_rate": 6.7265803503427275e-06, "loss": 18.36, "step": 1105 }, { "epoch": 0.020216791269855778, "grad_norm": 7.660706061027755, "learning_rate": 6.732673267326733e-06, "loss": 18.0976, "step": 1106 }, { "epoch": 0.0202350704663023, "grad_norm": 7.696634048353973, "learning_rate": 6.738766184310739e-06, "loss": 17.7352, "step": 1107 }, { "epoch": 0.020253349662748825, "grad_norm": 14.379213048417702, "learning_rate": 6.744859101294745e-06, "loss": 17.7067, "step": 1108 }, { "epoch": 0.02027162885919535, "grad_norm": 8.662425910853875, "learning_rate": 6.750952018278751e-06, "loss": 18.3063, "step": 1109 }, { "epoch": 0.020289908055641875, "grad_norm": 7.891443885305967, "learning_rate": 6.757044935262758e-06, "loss": 17.7426, "step": 1110 }, { "epoch": 0.020308187252088397, "grad_norm": 7.881527662289122, "learning_rate": 6.763137852246764e-06, "loss": 18.2592, "step": 1111 }, { "epoch": 0.020326466448534922, "grad_norm": 6.824244603799724, "learning_rate": 6.76923076923077e-06, "loss": 17.5405, "step": 1112 }, { "epoch": 0.020344745644981448, "grad_norm": 6.955748008576041, "learning_rate": 6.775323686214776e-06, "loss": 17.4328, "step": 1113 }, { "epoch": 0.02036302484142797, "grad_norm": 8.531623209320305, "learning_rate": 6.781416603198782e-06, "loss": 18.531, "step": 1114 }, { "epoch": 0.020381304037874495, "grad_norm": 7.793085602064876, "learning_rate": 6.787509520182788e-06, "loss": 17.8731, "step": 1115 }, { "epoch": 0.02039958323432102, "grad_norm": 8.83908926012251, "learning_rate": 6.793602437166794e-06, "loss": 17.9839, "step": 1116 }, { "epoch": 0.020417862430767545, "grad_norm": 9.225053276907715, "learning_rate": 6.7996953541508e-06, "loss": 18.1455, "step": 1117 }, { "epoch": 0.020436141627214067, "grad_norm": 8.482282192574337, "learning_rate": 6.805788271134807e-06, "loss": 18.2174, "step": 1118 }, { "epoch": 0.020454420823660592, "grad_norm": 8.932118579503443, "learning_rate": 6.811881188118813e-06, "loss": 18.3675, "step": 1119 }, { "epoch": 0.020472700020107117, "grad_norm": 7.505436002781391, "learning_rate": 6.817974105102818e-06, "loss": 18.0324, "step": 1120 }, { "epoch": 0.02049097921655364, "grad_norm": 6.9349703359748585, "learning_rate": 6.824067022086825e-06, "loss": 17.7132, "step": 1121 }, { "epoch": 0.020509258413000164, "grad_norm": 8.981324443612397, "learning_rate": 6.8301599390708305e-06, "loss": 18.2598, "step": 1122 }, { "epoch": 0.02052753760944669, "grad_norm": 7.754558487822148, "learning_rate": 6.836252856054837e-06, "loss": 18.0601, "step": 1123 }, { "epoch": 0.02054581680589321, "grad_norm": 8.370429440767074, "learning_rate": 6.842345773038843e-06, "loss": 17.9774, "step": 1124 }, { "epoch": 0.020564096002339737, "grad_norm": 8.02602419251651, "learning_rate": 6.8484386900228495e-06, "loss": 17.9154, "step": 1125 }, { "epoch": 0.02058237519878626, "grad_norm": 7.650949652381696, "learning_rate": 6.854531607006854e-06, "loss": 17.8702, "step": 1126 }, { "epoch": 0.020600654395232787, "grad_norm": 8.204914255177334, "learning_rate": 6.860624523990861e-06, "loss": 18.0647, "step": 1127 }, { "epoch": 0.02061893359167931, "grad_norm": 6.855912112886448, "learning_rate": 6.866717440974867e-06, "loss": 17.48, "step": 1128 }, { "epoch": 0.020637212788125834, "grad_norm": 8.105338657042287, "learning_rate": 6.872810357958873e-06, "loss": 18.0055, "step": 1129 }, { "epoch": 0.02065549198457236, "grad_norm": 16.77405238000618, "learning_rate": 6.87890327494288e-06, "loss": 18.3776, "step": 1130 }, { "epoch": 0.02067377118101888, "grad_norm": 6.822106523614206, "learning_rate": 6.884996191926886e-06, "loss": 17.4504, "step": 1131 }, { "epoch": 0.020692050377465406, "grad_norm": 7.702096099615508, "learning_rate": 6.8910891089108915e-06, "loss": 17.686, "step": 1132 }, { "epoch": 0.02071032957391193, "grad_norm": 8.577549907942483, "learning_rate": 6.897182025894897e-06, "loss": 18.5179, "step": 1133 }, { "epoch": 0.020728608770358457, "grad_norm": 77.81058128614599, "learning_rate": 6.903274942878904e-06, "loss": 18.0846, "step": 1134 }, { "epoch": 0.02074688796680498, "grad_norm": 7.777221959468819, "learning_rate": 6.90936785986291e-06, "loss": 17.982, "step": 1135 }, { "epoch": 0.020765167163251504, "grad_norm": 8.103358397861477, "learning_rate": 6.915460776846916e-06, "loss": 18.0749, "step": 1136 }, { "epoch": 0.02078344635969803, "grad_norm": 7.954324518176213, "learning_rate": 6.921553693830923e-06, "loss": 17.9536, "step": 1137 }, { "epoch": 0.02080172555614455, "grad_norm": 7.646304408043269, "learning_rate": 6.9276466108149285e-06, "loss": 17.7323, "step": 1138 }, { "epoch": 0.020820004752591076, "grad_norm": 8.764899829664277, "learning_rate": 6.933739527798934e-06, "loss": 17.9325, "step": 1139 }, { "epoch": 0.0208382839490376, "grad_norm": 8.90169081801138, "learning_rate": 6.93983244478294e-06, "loss": 18.2958, "step": 1140 }, { "epoch": 0.020856563145484123, "grad_norm": 8.411302997086182, "learning_rate": 6.945925361766947e-06, "loss": 18.0948, "step": 1141 }, { "epoch": 0.020874842341930648, "grad_norm": 8.049056501337239, "learning_rate": 6.9520182787509524e-06, "loss": 18.2222, "step": 1142 }, { "epoch": 0.020893121538377173, "grad_norm": 7.323621447312056, "learning_rate": 6.958111195734959e-06, "loss": 17.8308, "step": 1143 }, { "epoch": 0.0209114007348237, "grad_norm": 8.592023496690768, "learning_rate": 6.964204112718966e-06, "loss": 18.1117, "step": 1144 }, { "epoch": 0.02092967993127022, "grad_norm": 7.995621972747377, "learning_rate": 6.9702970297029706e-06, "loss": 17.7442, "step": 1145 }, { "epoch": 0.020947959127716746, "grad_norm": 6.8862191468237945, "learning_rate": 6.976389946686976e-06, "loss": 17.4325, "step": 1146 }, { "epoch": 0.02096623832416327, "grad_norm": 8.536205187859835, "learning_rate": 6.982482863670983e-06, "loss": 18.3987, "step": 1147 }, { "epoch": 0.020984517520609793, "grad_norm": 7.969283065900971, "learning_rate": 6.9885757806549895e-06, "loss": 18.0284, "step": 1148 }, { "epoch": 0.021002796717056318, "grad_norm": 8.109090041444265, "learning_rate": 6.994668697638995e-06, "loss": 17.8368, "step": 1149 }, { "epoch": 0.021021075913502843, "grad_norm": 8.094394126698537, "learning_rate": 7.000761614623002e-06, "loss": 17.8873, "step": 1150 }, { "epoch": 0.021039355109949368, "grad_norm": 7.159650139988644, "learning_rate": 7.006854531607007e-06, "loss": 17.7425, "step": 1151 }, { "epoch": 0.02105763430639589, "grad_norm": 8.30888924591329, "learning_rate": 7.012947448591013e-06, "loss": 17.8527, "step": 1152 }, { "epoch": 0.021075913502842415, "grad_norm": 8.820744459819323, "learning_rate": 7.019040365575019e-06, "loss": 18.3725, "step": 1153 }, { "epoch": 0.02109419269928894, "grad_norm": 8.35480812143568, "learning_rate": 7.025133282559026e-06, "loss": 18.1813, "step": 1154 }, { "epoch": 0.021112471895735462, "grad_norm": 8.036912715992331, "learning_rate": 7.031226199543032e-06, "loss": 17.8396, "step": 1155 }, { "epoch": 0.021130751092181987, "grad_norm": 7.277613895898985, "learning_rate": 7.037319116527038e-06, "loss": 17.5703, "step": 1156 }, { "epoch": 0.021149030288628513, "grad_norm": 8.160707335936069, "learning_rate": 7.043412033511045e-06, "loss": 17.9347, "step": 1157 }, { "epoch": 0.021167309485075034, "grad_norm": 8.324350963433098, "learning_rate": 7.04950495049505e-06, "loss": 17.9741, "step": 1158 }, { "epoch": 0.02118558868152156, "grad_norm": 8.313499862915545, "learning_rate": 7.055597867479056e-06, "loss": 17.9326, "step": 1159 }, { "epoch": 0.021203867877968085, "grad_norm": 8.373934244972215, "learning_rate": 7.061690784463062e-06, "loss": 18.0119, "step": 1160 }, { "epoch": 0.02122214707441461, "grad_norm": 8.734489214418272, "learning_rate": 7.067783701447069e-06, "loss": 18.5876, "step": 1161 }, { "epoch": 0.021240426270861132, "grad_norm": 7.940739926024548, "learning_rate": 7.073876618431074e-06, "loss": 17.9932, "step": 1162 }, { "epoch": 0.021258705467307657, "grad_norm": 7.643890938802826, "learning_rate": 7.079969535415081e-06, "loss": 17.6314, "step": 1163 }, { "epoch": 0.021276984663754182, "grad_norm": 7.541401411005756, "learning_rate": 7.086062452399086e-06, "loss": 17.9616, "step": 1164 }, { "epoch": 0.021295263860200704, "grad_norm": 9.213679865116323, "learning_rate": 7.0921553693830925e-06, "loss": 18.9202, "step": 1165 }, { "epoch": 0.02131354305664723, "grad_norm": 8.511693300693606, "learning_rate": 7.098248286367099e-06, "loss": 18.196, "step": 1166 }, { "epoch": 0.021331822253093755, "grad_norm": 7.7357627986481985, "learning_rate": 7.104341203351105e-06, "loss": 18.066, "step": 1167 }, { "epoch": 0.02135010144954028, "grad_norm": 8.057760559908589, "learning_rate": 7.1104341203351115e-06, "loss": 18.0976, "step": 1168 }, { "epoch": 0.0213683806459868, "grad_norm": 8.785475163168408, "learning_rate": 7.116527037319117e-06, "loss": 18.1866, "step": 1169 }, { "epoch": 0.021386659842433327, "grad_norm": 7.4913324178511225, "learning_rate": 7.122619954303123e-06, "loss": 17.7044, "step": 1170 }, { "epoch": 0.021404939038879852, "grad_norm": 7.681956106312898, "learning_rate": 7.128712871287129e-06, "loss": 17.8684, "step": 1171 }, { "epoch": 0.021423218235326374, "grad_norm": 7.4616185648565905, "learning_rate": 7.134805788271135e-06, "loss": 17.7606, "step": 1172 }, { "epoch": 0.0214414974317729, "grad_norm": 7.119361393828716, "learning_rate": 7.140898705255141e-06, "loss": 17.7639, "step": 1173 }, { "epoch": 0.021459776628219424, "grad_norm": 9.123788086352594, "learning_rate": 7.146991622239148e-06, "loss": 18.2655, "step": 1174 }, { "epoch": 0.021478055824665946, "grad_norm": 9.977952496483601, "learning_rate": 7.153084539223154e-06, "loss": 18.1025, "step": 1175 }, { "epoch": 0.02149633502111247, "grad_norm": 8.010184469210712, "learning_rate": 7.15917745620716e-06, "loss": 18.0235, "step": 1176 }, { "epoch": 0.021514614217558996, "grad_norm": 7.340378645183165, "learning_rate": 7.165270373191166e-06, "loss": 17.7148, "step": 1177 }, { "epoch": 0.02153289341400552, "grad_norm": 8.10655777724448, "learning_rate": 7.171363290175172e-06, "loss": 18.1131, "step": 1178 }, { "epoch": 0.021551172610452043, "grad_norm": 6.828081387443637, "learning_rate": 7.177456207159178e-06, "loss": 17.4375, "step": 1179 }, { "epoch": 0.02156945180689857, "grad_norm": 8.552928156945542, "learning_rate": 7.183549124143184e-06, "loss": 18.1215, "step": 1180 }, { "epoch": 0.021587731003345094, "grad_norm": 7.566082042425317, "learning_rate": 7.1896420411271906e-06, "loss": 17.7592, "step": 1181 }, { "epoch": 0.021606010199791616, "grad_norm": 8.748982056419061, "learning_rate": 7.195734958111197e-06, "loss": 18.2236, "step": 1182 }, { "epoch": 0.02162428939623814, "grad_norm": 7.85067598085476, "learning_rate": 7.201827875095202e-06, "loss": 17.6095, "step": 1183 }, { "epoch": 0.021642568592684666, "grad_norm": 7.498161013542759, "learning_rate": 7.207920792079208e-06, "loss": 17.9093, "step": 1184 }, { "epoch": 0.02166084778913119, "grad_norm": 7.768815471400683, "learning_rate": 7.2140137090632144e-06, "loss": 17.9479, "step": 1185 }, { "epoch": 0.021679126985577713, "grad_norm": 8.335391937231623, "learning_rate": 7.220106626047221e-06, "loss": 18.1221, "step": 1186 }, { "epoch": 0.02169740618202424, "grad_norm": 7.984945971046457, "learning_rate": 7.226199543031227e-06, "loss": 18.1438, "step": 1187 }, { "epoch": 0.021715685378470764, "grad_norm": 9.06770301142076, "learning_rate": 7.232292460015233e-06, "loss": 18.6921, "step": 1188 }, { "epoch": 0.021733964574917285, "grad_norm": 8.037666318852676, "learning_rate": 7.238385376999238e-06, "loss": 18.3687, "step": 1189 }, { "epoch": 0.02175224377136381, "grad_norm": 7.792236263999609, "learning_rate": 7.244478293983245e-06, "loss": 18.0255, "step": 1190 }, { "epoch": 0.021770522967810336, "grad_norm": 8.645863994009908, "learning_rate": 7.250571210967251e-06, "loss": 18.3111, "step": 1191 }, { "epoch": 0.021788802164256858, "grad_norm": 8.891587503165612, "learning_rate": 7.256664127951257e-06, "loss": 18.6329, "step": 1192 }, { "epoch": 0.021807081360703383, "grad_norm": 7.7969448972465125, "learning_rate": 7.262757044935264e-06, "loss": 17.9866, "step": 1193 }, { "epoch": 0.021825360557149908, "grad_norm": 9.036182301496826, "learning_rate": 7.26884996191927e-06, "loss": 18.2366, "step": 1194 }, { "epoch": 0.021843639753596433, "grad_norm": 7.9134513330621825, "learning_rate": 7.274942878903275e-06, "loss": 18.3917, "step": 1195 }, { "epoch": 0.021861918950042955, "grad_norm": 10.12767509358225, "learning_rate": 7.281035795887281e-06, "loss": 18.3437, "step": 1196 }, { "epoch": 0.02188019814648948, "grad_norm": 9.349416412034195, "learning_rate": 7.287128712871288e-06, "loss": 18.2855, "step": 1197 }, { "epoch": 0.021898477342936006, "grad_norm": 8.686163963468262, "learning_rate": 7.2932216298552935e-06, "loss": 18.1963, "step": 1198 }, { "epoch": 0.021916756539382527, "grad_norm": 8.215999122820618, "learning_rate": 7.2993145468393e-06, "loss": 18.0912, "step": 1199 }, { "epoch": 0.021935035735829053, "grad_norm": 8.909752446449565, "learning_rate": 7.305407463823307e-06, "loss": 18.4349, "step": 1200 }, { "epoch": 0.021953314932275578, "grad_norm": 7.864052924827168, "learning_rate": 7.3115003808073125e-06, "loss": 18.1708, "step": 1201 }, { "epoch": 0.021971594128722103, "grad_norm": 7.975719874675216, "learning_rate": 7.317593297791317e-06, "loss": 17.9927, "step": 1202 }, { "epoch": 0.021989873325168625, "grad_norm": 8.06241978310815, "learning_rate": 7.323686214775324e-06, "loss": 18.1951, "step": 1203 }, { "epoch": 0.02200815252161515, "grad_norm": 8.61763658655989, "learning_rate": 7.329779131759331e-06, "loss": 18.4821, "step": 1204 }, { "epoch": 0.022026431718061675, "grad_norm": 8.610045656208115, "learning_rate": 7.335872048743336e-06, "loss": 18.4585, "step": 1205 }, { "epoch": 0.022044710914508197, "grad_norm": 7.1058283019046815, "learning_rate": 7.341964965727343e-06, "loss": 17.9557, "step": 1206 }, { "epoch": 0.022062990110954722, "grad_norm": 8.852871625404843, "learning_rate": 7.348057882711349e-06, "loss": 18.8266, "step": 1207 }, { "epoch": 0.022081269307401247, "grad_norm": 8.641928794327619, "learning_rate": 7.3541507996953545e-06, "loss": 18.5967, "step": 1208 }, { "epoch": 0.02209954850384777, "grad_norm": 8.118616842382611, "learning_rate": 7.36024371667936e-06, "loss": 17.9766, "step": 1209 }, { "epoch": 0.022117827700294294, "grad_norm": 7.8062309682541615, "learning_rate": 7.366336633663367e-06, "loss": 18.2301, "step": 1210 }, { "epoch": 0.02213610689674082, "grad_norm": 8.256561305103666, "learning_rate": 7.3724295506473735e-06, "loss": 18.2072, "step": 1211 }, { "epoch": 0.022154386093187345, "grad_norm": 7.541355836416142, "learning_rate": 7.378522467631379e-06, "loss": 17.8086, "step": 1212 }, { "epoch": 0.022172665289633867, "grad_norm": 8.373746914388754, "learning_rate": 7.384615384615386e-06, "loss": 18.2852, "step": 1213 }, { "epoch": 0.022190944486080392, "grad_norm": 6.529008972161452, "learning_rate": 7.390708301599391e-06, "loss": 17.2029, "step": 1214 }, { "epoch": 0.022209223682526917, "grad_norm": 8.548302012635622, "learning_rate": 7.396801218583397e-06, "loss": 18.3498, "step": 1215 }, { "epoch": 0.02222750287897344, "grad_norm": 9.300415300136658, "learning_rate": 7.402894135567403e-06, "loss": 18.4759, "step": 1216 }, { "epoch": 0.022245782075419964, "grad_norm": 7.982304795140464, "learning_rate": 7.40898705255141e-06, "loss": 18.0739, "step": 1217 }, { "epoch": 0.02226406127186649, "grad_norm": 6.895074547057548, "learning_rate": 7.4150799695354155e-06, "loss": 17.5846, "step": 1218 }, { "epoch": 0.022282340468313015, "grad_norm": 9.15493873323251, "learning_rate": 7.421172886519422e-06, "loss": 18.7379, "step": 1219 }, { "epoch": 0.022300619664759536, "grad_norm": 7.37272617147564, "learning_rate": 7.427265803503429e-06, "loss": 17.6261, "step": 1220 }, { "epoch": 0.02231889886120606, "grad_norm": 8.105896696615837, "learning_rate": 7.433358720487434e-06, "loss": 18.3197, "step": 1221 }, { "epoch": 0.022337178057652587, "grad_norm": 6.971224136969124, "learning_rate": 7.43945163747144e-06, "loss": 17.5691, "step": 1222 }, { "epoch": 0.02235545725409911, "grad_norm": 7.7229301300845155, "learning_rate": 7.445544554455446e-06, "loss": 17.9763, "step": 1223 }, { "epoch": 0.022373736450545634, "grad_norm": 8.79029937572499, "learning_rate": 7.4516374714394526e-06, "loss": 18.6975, "step": 1224 }, { "epoch": 0.02239201564699216, "grad_norm": 7.653051662866822, "learning_rate": 7.457730388423458e-06, "loss": 17.8637, "step": 1225 }, { "epoch": 0.02241029484343868, "grad_norm": 10.015118294113028, "learning_rate": 7.463823305407465e-06, "loss": 18.3986, "step": 1226 }, { "epoch": 0.022428574039885206, "grad_norm": 9.135396156077341, "learning_rate": 7.46991622239147e-06, "loss": 18.1684, "step": 1227 }, { "epoch": 0.02244685323633173, "grad_norm": 8.840419125171318, "learning_rate": 7.4760091393754765e-06, "loss": 18.5559, "step": 1228 }, { "epoch": 0.022465132432778256, "grad_norm": 8.685221814751454, "learning_rate": 7.482102056359482e-06, "loss": 17.9879, "step": 1229 }, { "epoch": 0.022483411629224778, "grad_norm": 7.582756705088697, "learning_rate": 7.488194973343489e-06, "loss": 17.8319, "step": 1230 }, { "epoch": 0.022501690825671303, "grad_norm": 9.55332474929589, "learning_rate": 7.4942878903274954e-06, "loss": 18.5795, "step": 1231 }, { "epoch": 0.02251997002211783, "grad_norm": 9.12537600125327, "learning_rate": 7.500380807311501e-06, "loss": 18.5816, "step": 1232 }, { "epoch": 0.02253824921856435, "grad_norm": 8.200056774096385, "learning_rate": 7.506473724295507e-06, "loss": 17.9549, "step": 1233 }, { "epoch": 0.022556528415010876, "grad_norm": 8.269221292204605, "learning_rate": 7.512566641279513e-06, "loss": 17.793, "step": 1234 }, { "epoch": 0.0225748076114574, "grad_norm": 6.9529769475855785, "learning_rate": 7.518659558263519e-06, "loss": 17.6142, "step": 1235 }, { "epoch": 0.022593086807903926, "grad_norm": 9.460123773145922, "learning_rate": 7.524752475247525e-06, "loss": 18.5836, "step": 1236 }, { "epoch": 0.022611366004350448, "grad_norm": 6.935214348736729, "learning_rate": 7.530845392231532e-06, "loss": 17.5914, "step": 1237 }, { "epoch": 0.022629645200796973, "grad_norm": 8.38004457254756, "learning_rate": 7.536938309215538e-06, "loss": 18.0831, "step": 1238 }, { "epoch": 0.0226479243972435, "grad_norm": 7.3943109247066, "learning_rate": 7.543031226199544e-06, "loss": 17.8206, "step": 1239 }, { "epoch": 0.02266620359369002, "grad_norm": 7.861977798654075, "learning_rate": 7.549124143183549e-06, "loss": 17.8095, "step": 1240 }, { "epoch": 0.022684482790136545, "grad_norm": 8.081413578470416, "learning_rate": 7.5552170601675556e-06, "loss": 18.0777, "step": 1241 }, { "epoch": 0.02270276198658307, "grad_norm": 8.227747563013045, "learning_rate": 7.561309977151562e-06, "loss": 18.1766, "step": 1242 }, { "epoch": 0.022721041183029592, "grad_norm": 8.080913904692055, "learning_rate": 7.567402894135568e-06, "loss": 17.864, "step": 1243 }, { "epoch": 0.022739320379476118, "grad_norm": 8.179396165410159, "learning_rate": 7.5734958111195745e-06, "loss": 17.9948, "step": 1244 }, { "epoch": 0.022757599575922643, "grad_norm": 9.386646441003995, "learning_rate": 7.579588728103581e-06, "loss": 18.8239, "step": 1245 }, { "epoch": 0.022775878772369168, "grad_norm": 8.975542711309226, "learning_rate": 7.585681645087586e-06, "loss": 18.5923, "step": 1246 }, { "epoch": 0.02279415796881569, "grad_norm": 8.844141143227652, "learning_rate": 7.591774562071592e-06, "loss": 18.1258, "step": 1247 }, { "epoch": 0.022812437165262215, "grad_norm": 7.184291908214676, "learning_rate": 7.597867479055598e-06, "loss": 17.7494, "step": 1248 }, { "epoch": 0.02283071636170874, "grad_norm": 7.973705351159692, "learning_rate": 7.603960396039605e-06, "loss": 17.9706, "step": 1249 }, { "epoch": 0.022848995558155262, "grad_norm": 7.011117924638664, "learning_rate": 7.610053313023611e-06, "loss": 17.6627, "step": 1250 }, { "epoch": 0.022867274754601787, "grad_norm": 6.581057991386631, "learning_rate": 7.616146230007617e-06, "loss": 17.6904, "step": 1251 }, { "epoch": 0.022885553951048312, "grad_norm": 8.604049446250782, "learning_rate": 7.622239146991622e-06, "loss": 18.4233, "step": 1252 }, { "epoch": 0.022903833147494838, "grad_norm": 6.804372320887771, "learning_rate": 7.628332063975629e-06, "loss": 17.3679, "step": 1253 }, { "epoch": 0.02292211234394136, "grad_norm": 8.276516712834422, "learning_rate": 7.634424980959635e-06, "loss": 18.0717, "step": 1254 }, { "epoch": 0.022940391540387885, "grad_norm": 8.808370707379996, "learning_rate": 7.640517897943641e-06, "loss": 18.0482, "step": 1255 }, { "epoch": 0.02295867073683441, "grad_norm": 7.882389237429789, "learning_rate": 7.646610814927647e-06, "loss": 17.8727, "step": 1256 }, { "epoch": 0.02297694993328093, "grad_norm": 8.213135962682458, "learning_rate": 7.652703731911654e-06, "loss": 18.0153, "step": 1257 }, { "epoch": 0.022995229129727457, "grad_norm": 8.466431958181476, "learning_rate": 7.658796648895659e-06, "loss": 18.008, "step": 1258 }, { "epoch": 0.023013508326173982, "grad_norm": 7.234200452168259, "learning_rate": 7.664889565879666e-06, "loss": 17.4817, "step": 1259 }, { "epoch": 0.023031787522620504, "grad_norm": 9.59877550737015, "learning_rate": 7.670982482863672e-06, "loss": 18.6498, "step": 1260 }, { "epoch": 0.02305006671906703, "grad_norm": 8.341860614988887, "learning_rate": 7.677075399847677e-06, "loss": 18.2708, "step": 1261 }, { "epoch": 0.023068345915513554, "grad_norm": 9.281859020029916, "learning_rate": 7.683168316831683e-06, "loss": 18.9397, "step": 1262 }, { "epoch": 0.02308662511196008, "grad_norm": 7.710288194975174, "learning_rate": 7.68926123381569e-06, "loss": 17.9752, "step": 1263 }, { "epoch": 0.0231049043084066, "grad_norm": 7.063195913746644, "learning_rate": 7.695354150799696e-06, "loss": 17.487, "step": 1264 }, { "epoch": 0.023123183504853127, "grad_norm": 7.9920256770402345, "learning_rate": 7.701447067783702e-06, "loss": 17.8697, "step": 1265 }, { "epoch": 0.023141462701299652, "grad_norm": 8.164032651953145, "learning_rate": 7.707539984767708e-06, "loss": 18.4767, "step": 1266 }, { "epoch": 0.023159741897746174, "grad_norm": 9.071888596238017, "learning_rate": 7.713632901751714e-06, "loss": 18.0909, "step": 1267 }, { "epoch": 0.0231780210941927, "grad_norm": 12.59211531240348, "learning_rate": 7.719725818735721e-06, "loss": 18.4718, "step": 1268 }, { "epoch": 0.023196300290639224, "grad_norm": 7.991050919837849, "learning_rate": 7.725818735719727e-06, "loss": 18.1672, "step": 1269 }, { "epoch": 0.02321457948708575, "grad_norm": 7.370297221569767, "learning_rate": 7.731911652703733e-06, "loss": 17.7813, "step": 1270 }, { "epoch": 0.02323285868353227, "grad_norm": 7.807076685784157, "learning_rate": 7.738004569687738e-06, "loss": 17.8473, "step": 1271 }, { "epoch": 0.023251137879978796, "grad_norm": 7.309326311800068, "learning_rate": 7.744097486671744e-06, "loss": 17.7814, "step": 1272 }, { "epoch": 0.02326941707642532, "grad_norm": 7.143892484378977, "learning_rate": 7.75019040365575e-06, "loss": 17.8614, "step": 1273 }, { "epoch": 0.023287696272871843, "grad_norm": 8.334417705202664, "learning_rate": 7.756283320639757e-06, "loss": 17.9317, "step": 1274 }, { "epoch": 0.02330597546931837, "grad_norm": 8.737528222459407, "learning_rate": 7.762376237623763e-06, "loss": 18.4585, "step": 1275 }, { "epoch": 0.023324254665764894, "grad_norm": 7.829535864184691, "learning_rate": 7.768469154607769e-06, "loss": 18.0296, "step": 1276 }, { "epoch": 0.023342533862211415, "grad_norm": 7.179278938725156, "learning_rate": 7.774562071591775e-06, "loss": 17.659, "step": 1277 }, { "epoch": 0.02336081305865794, "grad_norm": 8.395295598169197, "learning_rate": 7.78065498857578e-06, "loss": 18.4474, "step": 1278 }, { "epoch": 0.023379092255104466, "grad_norm": 8.107695250173576, "learning_rate": 7.786747905559788e-06, "loss": 18.3523, "step": 1279 }, { "epoch": 0.02339737145155099, "grad_norm": 8.02572642939791, "learning_rate": 7.792840822543794e-06, "loss": 18.0061, "step": 1280 }, { "epoch": 0.023415650647997513, "grad_norm": 8.54594362762417, "learning_rate": 7.7989337395278e-06, "loss": 18.2185, "step": 1281 }, { "epoch": 0.023433929844444038, "grad_norm": 7.036244253198506, "learning_rate": 7.805026656511805e-06, "loss": 17.6652, "step": 1282 }, { "epoch": 0.023452209040890563, "grad_norm": 8.064019494416113, "learning_rate": 7.811119573495813e-06, "loss": 18.0908, "step": 1283 }, { "epoch": 0.023470488237337085, "grad_norm": 8.372108528772818, "learning_rate": 7.817212490479817e-06, "loss": 18.4715, "step": 1284 }, { "epoch": 0.02348876743378361, "grad_norm": 9.683067868413996, "learning_rate": 7.823305407463824e-06, "loss": 19.0494, "step": 1285 }, { "epoch": 0.023507046630230136, "grad_norm": 8.750064171301153, "learning_rate": 7.82939832444783e-06, "loss": 18.3841, "step": 1286 }, { "epoch": 0.02352532582667666, "grad_norm": 8.621107473939952, "learning_rate": 7.835491241431836e-06, "loss": 18.4852, "step": 1287 }, { "epoch": 0.023543605023123183, "grad_norm": 7.6179221400081385, "learning_rate": 7.841584158415843e-06, "loss": 17.8382, "step": 1288 }, { "epoch": 0.023561884219569708, "grad_norm": 7.162638702613578, "learning_rate": 7.847677075399849e-06, "loss": 17.9635, "step": 1289 }, { "epoch": 0.023580163416016233, "grad_norm": 7.740343557998449, "learning_rate": 7.853769992383855e-06, "loss": 18.1045, "step": 1290 }, { "epoch": 0.023598442612462755, "grad_norm": 7.856847072877854, "learning_rate": 7.85986290936786e-06, "loss": 17.9265, "step": 1291 }, { "epoch": 0.02361672180890928, "grad_norm": 7.819128359976211, "learning_rate": 7.865955826351866e-06, "loss": 18.0816, "step": 1292 }, { "epoch": 0.023635001005355805, "grad_norm": 6.68728879211887, "learning_rate": 7.872048743335872e-06, "loss": 17.2439, "step": 1293 }, { "epoch": 0.023653280201802327, "grad_norm": 7.637521824211896, "learning_rate": 7.87814166031988e-06, "loss": 17.8988, "step": 1294 }, { "epoch": 0.023671559398248852, "grad_norm": 9.032365359032385, "learning_rate": 7.884234577303885e-06, "loss": 18.6905, "step": 1295 }, { "epoch": 0.023689838594695378, "grad_norm": 8.251170325901612, "learning_rate": 7.890327494287891e-06, "loss": 18.1274, "step": 1296 }, { "epoch": 0.023708117791141903, "grad_norm": 7.744327318199243, "learning_rate": 7.896420411271897e-06, "loss": 17.9762, "step": 1297 }, { "epoch": 0.023726396987588425, "grad_norm": 7.5883481159425665, "learning_rate": 7.902513328255902e-06, "loss": 17.6878, "step": 1298 }, { "epoch": 0.02374467618403495, "grad_norm": 7.853608917102702, "learning_rate": 7.90860624523991e-06, "loss": 18.0944, "step": 1299 }, { "epoch": 0.023762955380481475, "grad_norm": 7.702152236003674, "learning_rate": 7.914699162223916e-06, "loss": 17.8327, "step": 1300 }, { "epoch": 0.023781234576927997, "grad_norm": 8.376841760535141, "learning_rate": 7.920792079207921e-06, "loss": 18.306, "step": 1301 }, { "epoch": 0.023799513773374522, "grad_norm": 8.309740430141177, "learning_rate": 7.926884996191929e-06, "loss": 17.7201, "step": 1302 }, { "epoch": 0.023817792969821047, "grad_norm": 7.3776967431244005, "learning_rate": 7.932977913175933e-06, "loss": 17.6082, "step": 1303 }, { "epoch": 0.023836072166267572, "grad_norm": 8.479560489220585, "learning_rate": 7.93907083015994e-06, "loss": 18.1974, "step": 1304 }, { "epoch": 0.023854351362714094, "grad_norm": 7.627994332940635, "learning_rate": 7.945163747143946e-06, "loss": 17.9485, "step": 1305 }, { "epoch": 0.02387263055916062, "grad_norm": 7.129336933906195, "learning_rate": 7.951256664127952e-06, "loss": 17.7024, "step": 1306 }, { "epoch": 0.023890909755607145, "grad_norm": 10.21498902994689, "learning_rate": 7.957349581111958e-06, "loss": 18.6435, "step": 1307 }, { "epoch": 0.023909188952053666, "grad_norm": 8.606824128185288, "learning_rate": 7.963442498095965e-06, "loss": 18.3299, "step": 1308 }, { "epoch": 0.02392746814850019, "grad_norm": 8.712980733390193, "learning_rate": 7.969535415079969e-06, "loss": 18.6535, "step": 1309 }, { "epoch": 0.023945747344946717, "grad_norm": 7.552296265392005, "learning_rate": 7.975628332063977e-06, "loss": 17.997, "step": 1310 }, { "epoch": 0.02396402654139324, "grad_norm": 8.348030354934505, "learning_rate": 7.981721249047982e-06, "loss": 18.0706, "step": 1311 }, { "epoch": 0.023982305737839764, "grad_norm": 8.030722009723371, "learning_rate": 7.987814166031988e-06, "loss": 18.0331, "step": 1312 }, { "epoch": 0.02400058493428629, "grad_norm": 8.09434320280789, "learning_rate": 7.993907083015996e-06, "loss": 18.4149, "step": 1313 }, { "epoch": 0.024018864130732814, "grad_norm": 8.1395626020549, "learning_rate": 8.000000000000001e-06, "loss": 18.3051, "step": 1314 }, { "epoch": 0.024037143327179336, "grad_norm": 8.052697051283777, "learning_rate": 8.006092916984007e-06, "loss": 18.0066, "step": 1315 }, { "epoch": 0.02405542252362586, "grad_norm": 7.117663298312368, "learning_rate": 8.012185833968013e-06, "loss": 17.6996, "step": 1316 }, { "epoch": 0.024073701720072387, "grad_norm": 7.712078554983164, "learning_rate": 8.018278750952019e-06, "loss": 17.8507, "step": 1317 }, { "epoch": 0.02409198091651891, "grad_norm": 8.428532674109862, "learning_rate": 8.024371667936024e-06, "loss": 18.1838, "step": 1318 }, { "epoch": 0.024110260112965434, "grad_norm": 7.979458394015958, "learning_rate": 8.030464584920032e-06, "loss": 17.9062, "step": 1319 }, { "epoch": 0.02412853930941196, "grad_norm": 8.53801088058669, "learning_rate": 8.036557501904038e-06, "loss": 18.3146, "step": 1320 }, { "epoch": 0.024146818505858484, "grad_norm": 8.362033362045155, "learning_rate": 8.042650418888043e-06, "loss": 18.0613, "step": 1321 }, { "epoch": 0.024165097702305006, "grad_norm": 7.526317721252609, "learning_rate": 8.048743335872049e-06, "loss": 17.9492, "step": 1322 }, { "epoch": 0.02418337689875153, "grad_norm": 7.766073534985361, "learning_rate": 8.054836252856055e-06, "loss": 18.0612, "step": 1323 }, { "epoch": 0.024201656095198056, "grad_norm": 7.771387693349303, "learning_rate": 8.060929169840062e-06, "loss": 17.6484, "step": 1324 }, { "epoch": 0.024219935291644578, "grad_norm": 7.451722400646125, "learning_rate": 8.067022086824068e-06, "loss": 17.7878, "step": 1325 }, { "epoch": 0.024238214488091103, "grad_norm": 7.859467102333334, "learning_rate": 8.073115003808074e-06, "loss": 17.9367, "step": 1326 }, { "epoch": 0.02425649368453763, "grad_norm": 7.056189195058994, "learning_rate": 8.07920792079208e-06, "loss": 17.7121, "step": 1327 }, { "epoch": 0.02427477288098415, "grad_norm": 7.959798202564799, "learning_rate": 8.085300837776085e-06, "loss": 18.172, "step": 1328 }, { "epoch": 0.024293052077430675, "grad_norm": 9.09571371326946, "learning_rate": 8.091393754760091e-06, "loss": 18.5352, "step": 1329 }, { "epoch": 0.0243113312738772, "grad_norm": 9.120709054336443, "learning_rate": 8.097486671744099e-06, "loss": 18.2114, "step": 1330 }, { "epoch": 0.024329610470323726, "grad_norm": 9.68771610352554, "learning_rate": 8.103579588728104e-06, "loss": 18.6835, "step": 1331 }, { "epoch": 0.024347889666770248, "grad_norm": 8.156074313348503, "learning_rate": 8.10967250571211e-06, "loss": 18.0789, "step": 1332 }, { "epoch": 0.024366168863216773, "grad_norm": 7.477498680504085, "learning_rate": 8.115765422696118e-06, "loss": 17.538, "step": 1333 }, { "epoch": 0.024384448059663298, "grad_norm": 8.659555772193457, "learning_rate": 8.121858339680122e-06, "loss": 18.72, "step": 1334 }, { "epoch": 0.02440272725610982, "grad_norm": 7.384290456888504, "learning_rate": 8.127951256664129e-06, "loss": 17.8932, "step": 1335 }, { "epoch": 0.024421006452556345, "grad_norm": 7.994339782859948, "learning_rate": 8.134044173648135e-06, "loss": 17.8404, "step": 1336 }, { "epoch": 0.02443928564900287, "grad_norm": 7.180607066628015, "learning_rate": 8.14013709063214e-06, "loss": 17.462, "step": 1337 }, { "epoch": 0.024457564845449396, "grad_norm": 9.032330013580443, "learning_rate": 8.146230007616146e-06, "loss": 18.5001, "step": 1338 }, { "epoch": 0.024475844041895917, "grad_norm": 7.546251182125515, "learning_rate": 8.152322924600154e-06, "loss": 18.0689, "step": 1339 }, { "epoch": 0.024494123238342443, "grad_norm": 8.256495716099927, "learning_rate": 8.158415841584158e-06, "loss": 18.2256, "step": 1340 }, { "epoch": 0.024512402434788968, "grad_norm": 8.446449937335991, "learning_rate": 8.164508758568165e-06, "loss": 18.2305, "step": 1341 }, { "epoch": 0.02453068163123549, "grad_norm": 7.647762757269953, "learning_rate": 8.170601675552171e-06, "loss": 17.9599, "step": 1342 }, { "epoch": 0.024548960827682015, "grad_norm": 9.336594874256123, "learning_rate": 8.176694592536177e-06, "loss": 18.5839, "step": 1343 }, { "epoch": 0.02456724002412854, "grad_norm": 8.458582893372924, "learning_rate": 8.182787509520184e-06, "loss": 18.3154, "step": 1344 }, { "epoch": 0.024585519220575062, "grad_norm": 7.221096422825132, "learning_rate": 8.18888042650419e-06, "loss": 17.6017, "step": 1345 }, { "epoch": 0.024603798417021587, "grad_norm": 8.195830887342922, "learning_rate": 8.194973343488196e-06, "loss": 18.1285, "step": 1346 }, { "epoch": 0.024622077613468112, "grad_norm": 7.271801674324512, "learning_rate": 8.201066260472202e-06, "loss": 17.6622, "step": 1347 }, { "epoch": 0.024640356809914638, "grad_norm": 7.7027468671500054, "learning_rate": 8.207159177456207e-06, "loss": 17.8249, "step": 1348 }, { "epoch": 0.02465863600636116, "grad_norm": 7.360532133484001, "learning_rate": 8.213252094440213e-06, "loss": 17.363, "step": 1349 }, { "epoch": 0.024676915202807684, "grad_norm": 7.690719116058331, "learning_rate": 8.21934501142422e-06, "loss": 17.9928, "step": 1350 }, { "epoch": 0.02469519439925421, "grad_norm": 7.945431554099586, "learning_rate": 8.225437928408226e-06, "loss": 18.0503, "step": 1351 }, { "epoch": 0.02471347359570073, "grad_norm": 7.106647440398628, "learning_rate": 8.231530845392232e-06, "loss": 17.7759, "step": 1352 }, { "epoch": 0.024731752792147257, "grad_norm": 7.751453091434756, "learning_rate": 8.237623762376238e-06, "loss": 17.9833, "step": 1353 }, { "epoch": 0.024750031988593782, "grad_norm": 8.384614172000148, "learning_rate": 8.243716679360244e-06, "loss": 17.792, "step": 1354 }, { "epoch": 0.024768311185040307, "grad_norm": 7.531315262910314, "learning_rate": 8.249809596344251e-06, "loss": 17.7091, "step": 1355 }, { "epoch": 0.02478659038148683, "grad_norm": 7.387069015047229, "learning_rate": 8.255902513328257e-06, "loss": 17.9286, "step": 1356 }, { "epoch": 0.024804869577933354, "grad_norm": 6.941253945063988, "learning_rate": 8.261995430312262e-06, "loss": 17.6529, "step": 1357 }, { "epoch": 0.02482314877437988, "grad_norm": 7.435027662647396, "learning_rate": 8.26808834729627e-06, "loss": 17.9858, "step": 1358 }, { "epoch": 0.0248414279708264, "grad_norm": 7.562534840266457, "learning_rate": 8.274181264280274e-06, "loss": 17.7777, "step": 1359 }, { "epoch": 0.024859707167272926, "grad_norm": 7.631480061899964, "learning_rate": 8.28027418126428e-06, "loss": 17.7919, "step": 1360 }, { "epoch": 0.02487798636371945, "grad_norm": 7.871776632436914, "learning_rate": 8.286367098248287e-06, "loss": 17.9316, "step": 1361 }, { "epoch": 0.024896265560165973, "grad_norm": 6.821665850570862, "learning_rate": 8.292460015232293e-06, "loss": 17.5959, "step": 1362 }, { "epoch": 0.0249145447566125, "grad_norm": 8.166453099333848, "learning_rate": 8.298552932216299e-06, "loss": 17.8598, "step": 1363 }, { "epoch": 0.024932823953059024, "grad_norm": 9.331197609076883, "learning_rate": 8.304645849200306e-06, "loss": 18.1722, "step": 1364 }, { "epoch": 0.02495110314950555, "grad_norm": 8.276872570089795, "learning_rate": 8.310738766184312e-06, "loss": 18.0691, "step": 1365 }, { "epoch": 0.02496938234595207, "grad_norm": 7.337851108553668, "learning_rate": 8.316831683168318e-06, "loss": 17.4398, "step": 1366 }, { "epoch": 0.024987661542398596, "grad_norm": 8.59083660305102, "learning_rate": 8.322924600152323e-06, "loss": 18.1222, "step": 1367 }, { "epoch": 0.02500594073884512, "grad_norm": 9.118795882833696, "learning_rate": 8.32901751713633e-06, "loss": 18.1363, "step": 1368 }, { "epoch": 0.025024219935291643, "grad_norm": 8.371941378867396, "learning_rate": 8.335110434120337e-06, "loss": 18.2809, "step": 1369 }, { "epoch": 0.02504249913173817, "grad_norm": 7.641238868727458, "learning_rate": 8.341203351104342e-06, "loss": 18.1704, "step": 1370 }, { "epoch": 0.025060778328184694, "grad_norm": 9.042468938009703, "learning_rate": 8.347296268088348e-06, "loss": 18.3551, "step": 1371 }, { "epoch": 0.02507905752463122, "grad_norm": 9.506376707817328, "learning_rate": 8.353389185072354e-06, "loss": 18.368, "step": 1372 }, { "epoch": 0.02509733672107774, "grad_norm": 7.770551176630004, "learning_rate": 8.35948210205636e-06, "loss": 17.8866, "step": 1373 }, { "epoch": 0.025115615917524266, "grad_norm": 8.290447583037134, "learning_rate": 8.365575019040365e-06, "loss": 18.5078, "step": 1374 }, { "epoch": 0.02513389511397079, "grad_norm": 9.77723850031326, "learning_rate": 8.371667936024373e-06, "loss": 18.3157, "step": 1375 }, { "epoch": 0.025152174310417313, "grad_norm": 8.296936345506735, "learning_rate": 8.377760853008379e-06, "loss": 18.2976, "step": 1376 }, { "epoch": 0.025170453506863838, "grad_norm": 8.124263286385034, "learning_rate": 8.383853769992384e-06, "loss": 17.8877, "step": 1377 }, { "epoch": 0.025188732703310363, "grad_norm": 6.454023710336849, "learning_rate": 8.38994668697639e-06, "loss": 17.4242, "step": 1378 }, { "epoch": 0.025207011899756885, "grad_norm": 8.9444550386909, "learning_rate": 8.396039603960396e-06, "loss": 18.3085, "step": 1379 }, { "epoch": 0.02522529109620341, "grad_norm": 9.69406386868736, "learning_rate": 8.402132520944403e-06, "loss": 18.3456, "step": 1380 }, { "epoch": 0.025243570292649935, "grad_norm": 8.074897555646956, "learning_rate": 8.40822543792841e-06, "loss": 17.8998, "step": 1381 }, { "epoch": 0.02526184948909646, "grad_norm": 7.596511536029738, "learning_rate": 8.414318354912415e-06, "loss": 17.9059, "step": 1382 }, { "epoch": 0.025280128685542982, "grad_norm": 8.829738986897409, "learning_rate": 8.42041127189642e-06, "loss": 18.412, "step": 1383 }, { "epoch": 0.025298407881989508, "grad_norm": 8.174371688738475, "learning_rate": 8.426504188880426e-06, "loss": 17.8264, "step": 1384 }, { "epoch": 0.025316687078436033, "grad_norm": 7.676731543848915, "learning_rate": 8.432597105864432e-06, "loss": 17.9388, "step": 1385 }, { "epoch": 0.025334966274882555, "grad_norm": 10.247213204871045, "learning_rate": 8.43869002284844e-06, "loss": 18.6418, "step": 1386 }, { "epoch": 0.02535324547132908, "grad_norm": 8.571825620462517, "learning_rate": 8.444782939832445e-06, "loss": 18.4308, "step": 1387 }, { "epoch": 0.025371524667775605, "grad_norm": 7.6782801297365095, "learning_rate": 8.450875856816451e-06, "loss": 18.0378, "step": 1388 }, { "epoch": 0.02538980386422213, "grad_norm": 7.48276588353417, "learning_rate": 8.456968773800459e-06, "loss": 17.8743, "step": 1389 }, { "epoch": 0.025408083060668652, "grad_norm": 8.583044967740078, "learning_rate": 8.463061690784464e-06, "loss": 17.9977, "step": 1390 }, { "epoch": 0.025426362257115177, "grad_norm": 8.824243738339353, "learning_rate": 8.46915460776847e-06, "loss": 18.3367, "step": 1391 }, { "epoch": 0.025444641453561703, "grad_norm": 7.915063711593708, "learning_rate": 8.475247524752476e-06, "loss": 18.2498, "step": 1392 }, { "epoch": 0.025462920650008224, "grad_norm": 7.177028404247519, "learning_rate": 8.481340441736482e-06, "loss": 17.6313, "step": 1393 }, { "epoch": 0.02548119984645475, "grad_norm": 7.274675016163252, "learning_rate": 8.487433358720487e-06, "loss": 17.9967, "step": 1394 }, { "epoch": 0.025499479042901275, "grad_norm": 8.246748890142642, "learning_rate": 8.493526275704495e-06, "loss": 18.0346, "step": 1395 }, { "epoch": 0.025517758239347797, "grad_norm": 8.132276464199869, "learning_rate": 8.4996191926885e-06, "loss": 17.7565, "step": 1396 }, { "epoch": 0.025536037435794322, "grad_norm": 9.160219262127326, "learning_rate": 8.505712109672506e-06, "loss": 18.8931, "step": 1397 }, { "epoch": 0.025554316632240847, "grad_norm": 6.949895499620933, "learning_rate": 8.511805026656512e-06, "loss": 17.6727, "step": 1398 }, { "epoch": 0.025572595828687372, "grad_norm": 8.297904726022859, "learning_rate": 8.517897943640518e-06, "loss": 18.4404, "step": 1399 }, { "epoch": 0.025590875025133894, "grad_norm": 8.146645435656021, "learning_rate": 8.523990860624525e-06, "loss": 17.9447, "step": 1400 }, { "epoch": 0.02560915422158042, "grad_norm": 8.20198363793982, "learning_rate": 8.530083777608531e-06, "loss": 17.9231, "step": 1401 }, { "epoch": 0.025627433418026944, "grad_norm": 7.367031033065404, "learning_rate": 8.536176694592537e-06, "loss": 17.7908, "step": 1402 }, { "epoch": 0.025645712614473466, "grad_norm": 7.993296262853948, "learning_rate": 8.542269611576543e-06, "loss": 18.4607, "step": 1403 }, { "epoch": 0.02566399181091999, "grad_norm": 8.358059257126076, "learning_rate": 8.548362528560548e-06, "loss": 18.1583, "step": 1404 }, { "epoch": 0.025682271007366517, "grad_norm": 7.8085931466035445, "learning_rate": 8.554455445544554e-06, "loss": 17.9273, "step": 1405 }, { "epoch": 0.025700550203813042, "grad_norm": 8.795074595148867, "learning_rate": 8.560548362528562e-06, "loss": 18.2, "step": 1406 }, { "epoch": 0.025718829400259564, "grad_norm": 7.341380886517458, "learning_rate": 8.566641279512567e-06, "loss": 17.7604, "step": 1407 }, { "epoch": 0.02573710859670609, "grad_norm": 6.530914080059693, "learning_rate": 8.572734196496573e-06, "loss": 17.4624, "step": 1408 }, { "epoch": 0.025755387793152614, "grad_norm": 7.879802029965479, "learning_rate": 8.57882711348058e-06, "loss": 17.9349, "step": 1409 }, { "epoch": 0.025773666989599136, "grad_norm": 8.300464636465835, "learning_rate": 8.584920030464585e-06, "loss": 17.7776, "step": 1410 }, { "epoch": 0.02579194618604566, "grad_norm": 8.395711099456939, "learning_rate": 8.591012947448592e-06, "loss": 18.0982, "step": 1411 }, { "epoch": 0.025810225382492186, "grad_norm": 8.474296514569252, "learning_rate": 8.597105864432598e-06, "loss": 18.0764, "step": 1412 }, { "epoch": 0.025828504578938708, "grad_norm": 8.777423423211545, "learning_rate": 8.603198781416604e-06, "loss": 18.3759, "step": 1413 }, { "epoch": 0.025846783775385233, "grad_norm": 7.36446733094962, "learning_rate": 8.609291698400611e-06, "loss": 17.9045, "step": 1414 }, { "epoch": 0.02586506297183176, "grad_norm": 9.504182858462068, "learning_rate": 8.615384615384617e-06, "loss": 18.6237, "step": 1415 }, { "epoch": 0.025883342168278284, "grad_norm": 9.063770990698762, "learning_rate": 8.621477532368621e-06, "loss": 18.4992, "step": 1416 }, { "epoch": 0.025901621364724806, "grad_norm": 7.996481775653979, "learning_rate": 8.627570449352628e-06, "loss": 18.1038, "step": 1417 }, { "epoch": 0.02591990056117133, "grad_norm": 8.596649768941777, "learning_rate": 8.633663366336634e-06, "loss": 18.3631, "step": 1418 }, { "epoch": 0.025938179757617856, "grad_norm": 7.646225022339392, "learning_rate": 8.63975628332064e-06, "loss": 17.8257, "step": 1419 }, { "epoch": 0.025956458954064378, "grad_norm": 8.799682429765236, "learning_rate": 8.645849200304647e-06, "loss": 18.4929, "step": 1420 }, { "epoch": 0.025974738150510903, "grad_norm": 7.632664788232884, "learning_rate": 8.651942117288653e-06, "loss": 17.7875, "step": 1421 }, { "epoch": 0.02599301734695743, "grad_norm": 6.492463194039088, "learning_rate": 8.658035034272659e-06, "loss": 17.3591, "step": 1422 }, { "epoch": 0.026011296543403954, "grad_norm": 8.51316418431203, "learning_rate": 8.664127951256665e-06, "loss": 18.2339, "step": 1423 }, { "epoch": 0.026029575739850475, "grad_norm": 8.855657011481085, "learning_rate": 8.67022086824067e-06, "loss": 18.3553, "step": 1424 }, { "epoch": 0.026047854936297, "grad_norm": 6.309419290605549, "learning_rate": 8.676313785224678e-06, "loss": 17.1397, "step": 1425 }, { "epoch": 0.026066134132743526, "grad_norm": 7.753423110867713, "learning_rate": 8.682406702208684e-06, "loss": 17.7617, "step": 1426 }, { "epoch": 0.026084413329190047, "grad_norm": 7.523806497889765, "learning_rate": 8.68849961919269e-06, "loss": 17.7815, "step": 1427 }, { "epoch": 0.026102692525636573, "grad_norm": 7.607409503407267, "learning_rate": 8.694592536176695e-06, "loss": 17.8936, "step": 1428 }, { "epoch": 0.026120971722083098, "grad_norm": 6.895009237836748, "learning_rate": 8.7006854531607e-06, "loss": 17.5724, "step": 1429 }, { "epoch": 0.02613925091852962, "grad_norm": 8.873927842757839, "learning_rate": 8.706778370144707e-06, "loss": 18.2576, "step": 1430 }, { "epoch": 0.026157530114976145, "grad_norm": 7.546760724787882, "learning_rate": 8.712871287128714e-06, "loss": 17.628, "step": 1431 }, { "epoch": 0.02617580931142267, "grad_norm": 7.9874520930562705, "learning_rate": 8.71896420411272e-06, "loss": 17.8476, "step": 1432 }, { "epoch": 0.026194088507869195, "grad_norm": 8.739040378905148, "learning_rate": 8.725057121096726e-06, "loss": 18.1888, "step": 1433 }, { "epoch": 0.026212367704315717, "grad_norm": 8.135508969320531, "learning_rate": 8.731150038080733e-06, "loss": 18.3601, "step": 1434 }, { "epoch": 0.026230646900762242, "grad_norm": 7.964696704255999, "learning_rate": 8.737242955064737e-06, "loss": 18.1727, "step": 1435 }, { "epoch": 0.026248926097208768, "grad_norm": 8.904126796062632, "learning_rate": 8.743335872048745e-06, "loss": 18.2646, "step": 1436 }, { "epoch": 0.02626720529365529, "grad_norm": 8.195402872333565, "learning_rate": 8.74942878903275e-06, "loss": 18.0862, "step": 1437 }, { "epoch": 0.026285484490101815, "grad_norm": 6.965868215006295, "learning_rate": 8.755521706016756e-06, "loss": 17.6407, "step": 1438 }, { "epoch": 0.02630376368654834, "grad_norm": 7.587462863152006, "learning_rate": 8.761614623000762e-06, "loss": 17.8189, "step": 1439 }, { "epoch": 0.026322042882994865, "grad_norm": 9.130781072940874, "learning_rate": 8.76770753998477e-06, "loss": 18.4033, "step": 1440 }, { "epoch": 0.026340322079441387, "grad_norm": 6.468831354069997, "learning_rate": 8.773800456968773e-06, "loss": 17.3407, "step": 1441 }, { "epoch": 0.026358601275887912, "grad_norm": 9.156269451296055, "learning_rate": 8.77989337395278e-06, "loss": 18.4001, "step": 1442 }, { "epoch": 0.026376880472334437, "grad_norm": 7.183314914355377, "learning_rate": 8.785986290936787e-06, "loss": 17.9738, "step": 1443 }, { "epoch": 0.02639515966878096, "grad_norm": 8.599106529459707, "learning_rate": 8.792079207920792e-06, "loss": 18.1737, "step": 1444 }, { "epoch": 0.026413438865227484, "grad_norm": 7.942865142973476, "learning_rate": 8.7981721249048e-06, "loss": 18.0981, "step": 1445 }, { "epoch": 0.02643171806167401, "grad_norm": 8.542916435346985, "learning_rate": 8.804265041888805e-06, "loss": 18.4702, "step": 1446 }, { "epoch": 0.02644999725812053, "grad_norm": 7.9391169682901515, "learning_rate": 8.810357958872811e-06, "loss": 17.8375, "step": 1447 }, { "epoch": 0.026468276454567057, "grad_norm": 7.973990058925455, "learning_rate": 8.816450875856817e-06, "loss": 18.0898, "step": 1448 }, { "epoch": 0.026486555651013582, "grad_norm": 8.14941052802256, "learning_rate": 8.822543792840823e-06, "loss": 18.5443, "step": 1449 }, { "epoch": 0.026504834847460107, "grad_norm": 8.351592252513374, "learning_rate": 8.828636709824829e-06, "loss": 18.0385, "step": 1450 }, { "epoch": 0.02652311404390663, "grad_norm": 12.065678884127646, "learning_rate": 8.834729626808836e-06, "loss": 17.76, "step": 1451 }, { "epoch": 0.026541393240353154, "grad_norm": 7.990968201402576, "learning_rate": 8.840822543792842e-06, "loss": 18.057, "step": 1452 }, { "epoch": 0.02655967243679968, "grad_norm": 7.870114463708758, "learning_rate": 8.846915460776847e-06, "loss": 17.5919, "step": 1453 }, { "epoch": 0.0265779516332462, "grad_norm": 8.37421222401054, "learning_rate": 8.853008377760853e-06, "loss": 17.9004, "step": 1454 }, { "epoch": 0.026596230829692726, "grad_norm": 7.512862929896719, "learning_rate": 8.859101294744859e-06, "loss": 17.7529, "step": 1455 }, { "epoch": 0.02661451002613925, "grad_norm": 8.277450520977178, "learning_rate": 8.865194211728866e-06, "loss": 18.1617, "step": 1456 }, { "epoch": 0.026632789222585777, "grad_norm": 7.8667995056229145, "learning_rate": 8.871287128712872e-06, "loss": 17.8809, "step": 1457 }, { "epoch": 0.0266510684190323, "grad_norm": 7.613928529760875, "learning_rate": 8.877380045696878e-06, "loss": 17.5719, "step": 1458 }, { "epoch": 0.026669347615478824, "grad_norm": 8.035004828733218, "learning_rate": 8.883472962680885e-06, "loss": 18.0841, "step": 1459 }, { "epoch": 0.02668762681192535, "grad_norm": 9.677789943846687, "learning_rate": 8.88956587966489e-06, "loss": 18.8802, "step": 1460 }, { "epoch": 0.02670590600837187, "grad_norm": 9.55373517160617, "learning_rate": 8.895658796648895e-06, "loss": 18.8335, "step": 1461 }, { "epoch": 0.026724185204818396, "grad_norm": 7.892072339341589, "learning_rate": 8.901751713632903e-06, "loss": 17.7087, "step": 1462 }, { "epoch": 0.02674246440126492, "grad_norm": 7.999818442281938, "learning_rate": 8.907844630616908e-06, "loss": 18.1386, "step": 1463 }, { "epoch": 0.026760743597711443, "grad_norm": 8.75483316003142, "learning_rate": 8.913937547600914e-06, "loss": 18.1697, "step": 1464 }, { "epoch": 0.026779022794157968, "grad_norm": 7.675223165596685, "learning_rate": 8.920030464584922e-06, "loss": 17.6498, "step": 1465 }, { "epoch": 0.026797301990604493, "grad_norm": 8.578202310137655, "learning_rate": 8.926123381568926e-06, "loss": 18.1871, "step": 1466 }, { "epoch": 0.02681558118705102, "grad_norm": 8.097931941837253, "learning_rate": 8.932216298552933e-06, "loss": 17.8763, "step": 1467 }, { "epoch": 0.02683386038349754, "grad_norm": 7.425964756487236, "learning_rate": 8.938309215536939e-06, "loss": 17.6646, "step": 1468 }, { "epoch": 0.026852139579944066, "grad_norm": 7.352889034745282, "learning_rate": 8.944402132520945e-06, "loss": 17.8098, "step": 1469 }, { "epoch": 0.02687041877639059, "grad_norm": 8.094022459132429, "learning_rate": 8.950495049504952e-06, "loss": 18.0239, "step": 1470 }, { "epoch": 0.026888697972837113, "grad_norm": 7.825426674849305, "learning_rate": 8.956587966488958e-06, "loss": 17.9517, "step": 1471 }, { "epoch": 0.026906977169283638, "grad_norm": 7.934927280932549, "learning_rate": 8.962680883472964e-06, "loss": 18.3729, "step": 1472 }, { "epoch": 0.026925256365730163, "grad_norm": 9.164940864472438, "learning_rate": 8.96877380045697e-06, "loss": 18.8277, "step": 1473 }, { "epoch": 0.026943535562176688, "grad_norm": 8.608148379267885, "learning_rate": 8.974866717440975e-06, "loss": 17.7031, "step": 1474 }, { "epoch": 0.02696181475862321, "grad_norm": 7.558928448476973, "learning_rate": 8.980959634424981e-06, "loss": 17.8896, "step": 1475 }, { "epoch": 0.026980093955069735, "grad_norm": 7.3185664527139265, "learning_rate": 8.987052551408988e-06, "loss": 17.7327, "step": 1476 }, { "epoch": 0.02699837315151626, "grad_norm": 9.086940154591609, "learning_rate": 8.993145468392994e-06, "loss": 18.6602, "step": 1477 }, { "epoch": 0.027016652347962782, "grad_norm": 8.199731072144623, "learning_rate": 8.999238385377e-06, "loss": 18.0815, "step": 1478 }, { "epoch": 0.027034931544409307, "grad_norm": 8.692421826783356, "learning_rate": 9.005331302361006e-06, "loss": 18.5877, "step": 1479 }, { "epoch": 0.027053210740855833, "grad_norm": 8.5464561802531, "learning_rate": 9.011424219345011e-06, "loss": 18.249, "step": 1480 }, { "epoch": 0.027071489937302358, "grad_norm": 8.391435606391324, "learning_rate": 9.017517136329019e-06, "loss": 18.234, "step": 1481 }, { "epoch": 0.02708976913374888, "grad_norm": 8.28855798861418, "learning_rate": 9.023610053313025e-06, "loss": 18.1964, "step": 1482 }, { "epoch": 0.027108048330195405, "grad_norm": 7.522206878997334, "learning_rate": 9.02970297029703e-06, "loss": 18.1083, "step": 1483 }, { "epoch": 0.02712632752664193, "grad_norm": 7.6996569698720165, "learning_rate": 9.035795887281036e-06, "loss": 17.7058, "step": 1484 }, { "epoch": 0.027144606723088452, "grad_norm": 7.770670567376332, "learning_rate": 9.041888804265042e-06, "loss": 17.6966, "step": 1485 }, { "epoch": 0.027162885919534977, "grad_norm": 8.512304497126742, "learning_rate": 9.047981721249048e-06, "loss": 18.2586, "step": 1486 }, { "epoch": 0.027181165115981502, "grad_norm": 7.627840662620894, "learning_rate": 9.054074638233055e-06, "loss": 18.1694, "step": 1487 }, { "epoch": 0.027199444312428024, "grad_norm": 7.91230120789127, "learning_rate": 9.060167555217061e-06, "loss": 17.8939, "step": 1488 }, { "epoch": 0.02721772350887455, "grad_norm": 8.156236620458687, "learning_rate": 9.066260472201067e-06, "loss": 18.2974, "step": 1489 }, { "epoch": 0.027236002705321075, "grad_norm": 7.951072967239382, "learning_rate": 9.072353389185074e-06, "loss": 17.9947, "step": 1490 }, { "epoch": 0.0272542819017676, "grad_norm": 6.436830455155051, "learning_rate": 9.07844630616908e-06, "loss": 17.38, "step": 1491 }, { "epoch": 0.02727256109821412, "grad_norm": 8.842741546278877, "learning_rate": 9.084539223153086e-06, "loss": 18.5525, "step": 1492 }, { "epoch": 0.027290840294660647, "grad_norm": 8.541348994364565, "learning_rate": 9.090632140137091e-06, "loss": 18.2744, "step": 1493 }, { "epoch": 0.027309119491107172, "grad_norm": 7.677758366809842, "learning_rate": 9.096725057121097e-06, "loss": 17.9632, "step": 1494 }, { "epoch": 0.027327398687553694, "grad_norm": 11.031595550811504, "learning_rate": 9.102817974105103e-06, "loss": 18.1301, "step": 1495 }, { "epoch": 0.02734567788400022, "grad_norm": 7.6316264895902615, "learning_rate": 9.10891089108911e-06, "loss": 17.5932, "step": 1496 }, { "epoch": 0.027363957080446744, "grad_norm": 7.942322461509622, "learning_rate": 9.115003808073116e-06, "loss": 17.8623, "step": 1497 }, { "epoch": 0.02738223627689327, "grad_norm": 8.987789022179895, "learning_rate": 9.121096725057122e-06, "loss": 18.4788, "step": 1498 }, { "epoch": 0.02740051547333979, "grad_norm": 7.202629039212628, "learning_rate": 9.127189642041128e-06, "loss": 17.7036, "step": 1499 }, { "epoch": 0.027418794669786316, "grad_norm": 8.718231529858707, "learning_rate": 9.133282559025133e-06, "loss": 18.4281, "step": 1500 }, { "epoch": 0.02743707386623284, "grad_norm": 8.224690865957287, "learning_rate": 9.13937547600914e-06, "loss": 17.9646, "step": 1501 }, { "epoch": 0.027455353062679363, "grad_norm": 8.103325679036537, "learning_rate": 9.145468392993147e-06, "loss": 17.8065, "step": 1502 }, { "epoch": 0.02747363225912589, "grad_norm": 7.595202129353247, "learning_rate": 9.151561309977152e-06, "loss": 17.7352, "step": 1503 }, { "epoch": 0.027491911455572414, "grad_norm": 9.137774770007482, "learning_rate": 9.157654226961158e-06, "loss": 18.267, "step": 1504 }, { "epoch": 0.027510190652018936, "grad_norm": 8.226653766856433, "learning_rate": 9.163747143945164e-06, "loss": 18.1586, "step": 1505 }, { "epoch": 0.02752846984846546, "grad_norm": 8.250869806845207, "learning_rate": 9.16984006092917e-06, "loss": 18.1652, "step": 1506 }, { "epoch": 0.027546749044911986, "grad_norm": 7.819873303002308, "learning_rate": 9.175932977913177e-06, "loss": 17.9536, "step": 1507 }, { "epoch": 0.02756502824135851, "grad_norm": 7.971809249675537, "learning_rate": 9.182025894897183e-06, "loss": 17.8049, "step": 1508 }, { "epoch": 0.027583307437805033, "grad_norm": 7.868639339632311, "learning_rate": 9.188118811881189e-06, "loss": 18.0095, "step": 1509 }, { "epoch": 0.02760158663425156, "grad_norm": 8.046255160243186, "learning_rate": 9.194211728865194e-06, "loss": 18.2376, "step": 1510 }, { "epoch": 0.027619865830698084, "grad_norm": 8.19271834161202, "learning_rate": 9.2003046458492e-06, "loss": 18.1397, "step": 1511 }, { "epoch": 0.027638145027144605, "grad_norm": 7.972401688363723, "learning_rate": 9.206397562833208e-06, "loss": 18.1718, "step": 1512 }, { "epoch": 0.02765642422359113, "grad_norm": 7.806246299530137, "learning_rate": 9.212490479817213e-06, "loss": 17.9699, "step": 1513 }, { "epoch": 0.027674703420037656, "grad_norm": 6.966609041650007, "learning_rate": 9.218583396801219e-06, "loss": 17.8136, "step": 1514 }, { "epoch": 0.02769298261648418, "grad_norm": 7.395969299114421, "learning_rate": 9.224676313785227e-06, "loss": 17.9745, "step": 1515 }, { "epoch": 0.027711261812930703, "grad_norm": 8.108307050861727, "learning_rate": 9.230769230769232e-06, "loss": 18.1577, "step": 1516 }, { "epoch": 0.027729541009377228, "grad_norm": 8.441209289907926, "learning_rate": 9.236862147753236e-06, "loss": 18.3238, "step": 1517 }, { "epoch": 0.027747820205823753, "grad_norm": 7.1675044271113855, "learning_rate": 9.242955064737244e-06, "loss": 17.5438, "step": 1518 }, { "epoch": 0.027766099402270275, "grad_norm": 7.356867118981828, "learning_rate": 9.24904798172125e-06, "loss": 17.5816, "step": 1519 }, { "epoch": 0.0277843785987168, "grad_norm": 8.201443433514227, "learning_rate": 9.255140898705255e-06, "loss": 18.529, "step": 1520 }, { "epoch": 0.027802657795163326, "grad_norm": 8.642047252829247, "learning_rate": 9.261233815689263e-06, "loss": 18.0311, "step": 1521 }, { "epoch": 0.027820936991609847, "grad_norm": 9.120647593902877, "learning_rate": 9.267326732673269e-06, "loss": 18.3459, "step": 1522 }, { "epoch": 0.027839216188056373, "grad_norm": 8.22577861240296, "learning_rate": 9.273419649657274e-06, "loss": 18.0776, "step": 1523 }, { "epoch": 0.027857495384502898, "grad_norm": 8.844626158932435, "learning_rate": 9.27951256664128e-06, "loss": 18.4426, "step": 1524 }, { "epoch": 0.027875774580949423, "grad_norm": 6.779648817912882, "learning_rate": 9.285605483625286e-06, "loss": 17.3176, "step": 1525 }, { "epoch": 0.027894053777395945, "grad_norm": 8.492963085283831, "learning_rate": 9.291698400609293e-06, "loss": 18.0535, "step": 1526 }, { "epoch": 0.02791233297384247, "grad_norm": 7.247862231746354, "learning_rate": 9.297791317593299e-06, "loss": 17.7189, "step": 1527 }, { "epoch": 0.027930612170288995, "grad_norm": 9.165028186905502, "learning_rate": 9.303884234577305e-06, "loss": 18.7618, "step": 1528 }, { "epoch": 0.027948891366735517, "grad_norm": 7.957554243726399, "learning_rate": 9.30997715156131e-06, "loss": 18.3054, "step": 1529 }, { "epoch": 0.027967170563182042, "grad_norm": 8.85634231870708, "learning_rate": 9.316070068545316e-06, "loss": 18.8453, "step": 1530 }, { "epoch": 0.027985449759628567, "grad_norm": 6.072056271219344, "learning_rate": 9.322162985529322e-06, "loss": 17.3045, "step": 1531 }, { "epoch": 0.028003728956075093, "grad_norm": 7.304157703076281, "learning_rate": 9.32825590251333e-06, "loss": 17.8859, "step": 1532 }, { "epoch": 0.028022008152521614, "grad_norm": 8.566346981570362, "learning_rate": 9.334348819497335e-06, "loss": 18.2739, "step": 1533 }, { "epoch": 0.02804028734896814, "grad_norm": 8.863402700936351, "learning_rate": 9.340441736481341e-06, "loss": 18.1267, "step": 1534 }, { "epoch": 0.028058566545414665, "grad_norm": 7.032337434310726, "learning_rate": 9.346534653465348e-06, "loss": 17.5745, "step": 1535 }, { "epoch": 0.028076845741861187, "grad_norm": 7.905856887685362, "learning_rate": 9.352627570449353e-06, "loss": 18.1291, "step": 1536 }, { "epoch": 0.028095124938307712, "grad_norm": 6.83724131858278, "learning_rate": 9.35872048743336e-06, "loss": 17.6759, "step": 1537 }, { "epoch": 0.028113404134754237, "grad_norm": 7.371139363257559, "learning_rate": 9.364813404417366e-06, "loss": 17.6574, "step": 1538 }, { "epoch": 0.02813168333120076, "grad_norm": 7.556429626574926, "learning_rate": 9.370906321401372e-06, "loss": 17.9372, "step": 1539 }, { "epoch": 0.028149962527647284, "grad_norm": 8.229899918476479, "learning_rate": 9.376999238385377e-06, "loss": 18.3724, "step": 1540 }, { "epoch": 0.02816824172409381, "grad_norm": 6.980564490492054, "learning_rate": 9.383092155369385e-06, "loss": 17.4149, "step": 1541 }, { "epoch": 0.028186520920540335, "grad_norm": 8.549941551251717, "learning_rate": 9.389185072353389e-06, "loss": 18.3236, "step": 1542 }, { "epoch": 0.028204800116986856, "grad_norm": 8.117965248196436, "learning_rate": 9.395277989337396e-06, "loss": 18.2625, "step": 1543 }, { "epoch": 0.02822307931343338, "grad_norm": 8.087251903485502, "learning_rate": 9.401370906321402e-06, "loss": 17.9754, "step": 1544 }, { "epoch": 0.028241358509879907, "grad_norm": 8.109262754282136, "learning_rate": 9.407463823305408e-06, "loss": 17.9735, "step": 1545 }, { "epoch": 0.02825963770632643, "grad_norm": 8.449701368675408, "learning_rate": 9.413556740289415e-06, "loss": 18.3141, "step": 1546 }, { "epoch": 0.028277916902772954, "grad_norm": 8.021681692913548, "learning_rate": 9.419649657273421e-06, "loss": 17.7494, "step": 1547 }, { "epoch": 0.02829619609921948, "grad_norm": 8.654610764330709, "learning_rate": 9.425742574257427e-06, "loss": 18.3164, "step": 1548 }, { "epoch": 0.028314475295666004, "grad_norm": 8.934826474472013, "learning_rate": 9.431835491241433e-06, "loss": 18.5265, "step": 1549 }, { "epoch": 0.028332754492112526, "grad_norm": 6.615085962692111, "learning_rate": 9.437928408225438e-06, "loss": 17.4637, "step": 1550 }, { "epoch": 0.02835103368855905, "grad_norm": 7.193084585584398, "learning_rate": 9.444021325209444e-06, "loss": 17.8196, "step": 1551 }, { "epoch": 0.028369312885005576, "grad_norm": 7.5635242082002705, "learning_rate": 9.450114242193451e-06, "loss": 17.8508, "step": 1552 }, { "epoch": 0.028387592081452098, "grad_norm": 8.727849038197348, "learning_rate": 9.456207159177457e-06, "loss": 17.8505, "step": 1553 }, { "epoch": 0.028405871277898623, "grad_norm": 8.546991746139607, "learning_rate": 9.462300076161463e-06, "loss": 18.2066, "step": 1554 }, { "epoch": 0.02842415047434515, "grad_norm": 8.434184761377928, "learning_rate": 9.468392993145469e-06, "loss": 18.3708, "step": 1555 }, { "epoch": 0.02844242967079167, "grad_norm": 7.600295088603328, "learning_rate": 9.474485910129475e-06, "loss": 17.712, "step": 1556 }, { "epoch": 0.028460708867238196, "grad_norm": 8.812810635554333, "learning_rate": 9.480578827113482e-06, "loss": 18.6113, "step": 1557 }, { "epoch": 0.02847898806368472, "grad_norm": 7.1772233524126765, "learning_rate": 9.486671744097488e-06, "loss": 17.3414, "step": 1558 }, { "epoch": 0.028497267260131246, "grad_norm": 8.008227503749223, "learning_rate": 9.492764661081493e-06, "loss": 17.7675, "step": 1559 }, { "epoch": 0.028515546456577768, "grad_norm": 7.615853746598634, "learning_rate": 9.498857578065501e-06, "loss": 17.6669, "step": 1560 }, { "epoch": 0.028533825653024293, "grad_norm": 9.332984012169115, "learning_rate": 9.504950495049505e-06, "loss": 18.7201, "step": 1561 }, { "epoch": 0.02855210484947082, "grad_norm": 6.923109174353397, "learning_rate": 9.51104341203351e-06, "loss": 17.9097, "step": 1562 }, { "epoch": 0.02857038404591734, "grad_norm": 7.931663485509248, "learning_rate": 9.517136329017518e-06, "loss": 17.7847, "step": 1563 }, { "epoch": 0.028588663242363865, "grad_norm": 7.60567237482634, "learning_rate": 9.523229246001524e-06, "loss": 17.9421, "step": 1564 }, { "epoch": 0.02860694243881039, "grad_norm": 7.780004874882818, "learning_rate": 9.52932216298553e-06, "loss": 17.8349, "step": 1565 }, { "epoch": 0.028625221635256916, "grad_norm": 7.848484706525409, "learning_rate": 9.535415079969537e-06, "loss": 17.9837, "step": 1566 }, { "epoch": 0.028643500831703438, "grad_norm": 9.128305637632943, "learning_rate": 9.541507996953541e-06, "loss": 18.6661, "step": 1567 }, { "epoch": 0.028661780028149963, "grad_norm": 8.250098244391708, "learning_rate": 9.547600913937549e-06, "loss": 18.1463, "step": 1568 }, { "epoch": 0.028680059224596488, "grad_norm": 7.282911330342466, "learning_rate": 9.553693830921554e-06, "loss": 17.9766, "step": 1569 }, { "epoch": 0.02869833842104301, "grad_norm": 8.728107028323366, "learning_rate": 9.55978674790556e-06, "loss": 18.4801, "step": 1570 }, { "epoch": 0.028716617617489535, "grad_norm": 7.322854026891061, "learning_rate": 9.565879664889568e-06, "loss": 17.4926, "step": 1571 }, { "epoch": 0.02873489681393606, "grad_norm": 9.024688004243888, "learning_rate": 9.571972581873573e-06, "loss": 18.507, "step": 1572 }, { "epoch": 0.028753176010382582, "grad_norm": 8.72362500062873, "learning_rate": 9.578065498857577e-06, "loss": 18.3873, "step": 1573 }, { "epoch": 0.028771455206829107, "grad_norm": 7.608476714455864, "learning_rate": 9.584158415841585e-06, "loss": 17.8651, "step": 1574 }, { "epoch": 0.028789734403275632, "grad_norm": 7.4612096250296105, "learning_rate": 9.59025133282559e-06, "loss": 17.8404, "step": 1575 }, { "epoch": 0.028808013599722158, "grad_norm": 7.472152034411249, "learning_rate": 9.596344249809596e-06, "loss": 17.8312, "step": 1576 }, { "epoch": 0.02882629279616868, "grad_norm": 8.903692153904508, "learning_rate": 9.602437166793604e-06, "loss": 18.5229, "step": 1577 }, { "epoch": 0.028844571992615205, "grad_norm": 7.796879083278068, "learning_rate": 9.60853008377761e-06, "loss": 17.9882, "step": 1578 }, { "epoch": 0.02886285118906173, "grad_norm": 7.756058097712065, "learning_rate": 9.614623000761615e-06, "loss": 17.9424, "step": 1579 }, { "epoch": 0.02888113038550825, "grad_norm": 9.72247257932452, "learning_rate": 9.620715917745621e-06, "loss": 19.3409, "step": 1580 }, { "epoch": 0.028899409581954777, "grad_norm": 7.420575822299361, "learning_rate": 9.626808834729627e-06, "loss": 17.5876, "step": 1581 }, { "epoch": 0.028917688778401302, "grad_norm": 8.134881444053162, "learning_rate": 9.632901751713634e-06, "loss": 17.9443, "step": 1582 }, { "epoch": 0.028935967974847827, "grad_norm": 8.170014718000967, "learning_rate": 9.63899466869764e-06, "loss": 18.1135, "step": 1583 }, { "epoch": 0.02895424717129435, "grad_norm": 7.727004414989496, "learning_rate": 9.645087585681646e-06, "loss": 18.0018, "step": 1584 }, { "epoch": 0.028972526367740874, "grad_norm": 8.724147523267623, "learning_rate": 9.651180502665652e-06, "loss": 18.3142, "step": 1585 }, { "epoch": 0.0289908055641874, "grad_norm": 6.320770195610164, "learning_rate": 9.657273419649657e-06, "loss": 17.0664, "step": 1586 }, { "epoch": 0.02900908476063392, "grad_norm": 7.725570724377613, "learning_rate": 9.663366336633663e-06, "loss": 18.0783, "step": 1587 }, { "epoch": 0.029027363957080447, "grad_norm": 7.694211438039342, "learning_rate": 9.66945925361767e-06, "loss": 17.6469, "step": 1588 }, { "epoch": 0.029045643153526972, "grad_norm": 7.70476316150784, "learning_rate": 9.675552170601676e-06, "loss": 17.7812, "step": 1589 }, { "epoch": 0.029063922349973494, "grad_norm": 8.563173823510006, "learning_rate": 9.681645087585682e-06, "loss": 18.2982, "step": 1590 }, { "epoch": 0.02908220154642002, "grad_norm": 7.4474122934320635, "learning_rate": 9.68773800456969e-06, "loss": 17.896, "step": 1591 }, { "epoch": 0.029100480742866544, "grad_norm": 8.433496131474252, "learning_rate": 9.693830921553694e-06, "loss": 18.0938, "step": 1592 }, { "epoch": 0.02911875993931307, "grad_norm": 7.952162694222141, "learning_rate": 9.699923838537701e-06, "loss": 17.8548, "step": 1593 }, { "epoch": 0.02913703913575959, "grad_norm": 9.547958041113377, "learning_rate": 9.706016755521707e-06, "loss": 18.7101, "step": 1594 }, { "epoch": 0.029155318332206116, "grad_norm": 7.35777251789467, "learning_rate": 9.712109672505713e-06, "loss": 17.6883, "step": 1595 }, { "epoch": 0.02917359752865264, "grad_norm": 8.204778394839916, "learning_rate": 9.718202589489718e-06, "loss": 18.4319, "step": 1596 }, { "epoch": 0.029191876725099163, "grad_norm": 8.288620463582449, "learning_rate": 9.724295506473726e-06, "loss": 18.2228, "step": 1597 }, { "epoch": 0.02921015592154569, "grad_norm": 7.888180051404007, "learning_rate": 9.730388423457732e-06, "loss": 17.9801, "step": 1598 }, { "epoch": 0.029228435117992214, "grad_norm": 7.935515095892514, "learning_rate": 9.736481340441737e-06, "loss": 18.1441, "step": 1599 }, { "epoch": 0.02924671431443874, "grad_norm": 6.5810518907819615, "learning_rate": 9.742574257425743e-06, "loss": 17.487, "step": 1600 }, { "epoch": 0.02926499351088526, "grad_norm": 8.057191057617796, "learning_rate": 9.748667174409749e-06, "loss": 18.5901, "step": 1601 }, { "epoch": 0.029283272707331786, "grad_norm": 7.454482283214667, "learning_rate": 9.754760091393756e-06, "loss": 17.7255, "step": 1602 }, { "epoch": 0.02930155190377831, "grad_norm": 7.1869910327073825, "learning_rate": 9.760853008377762e-06, "loss": 18.0372, "step": 1603 }, { "epoch": 0.029319831100224833, "grad_norm": 8.727975453617983, "learning_rate": 9.766945925361768e-06, "loss": 18.4149, "step": 1604 }, { "epoch": 0.029338110296671358, "grad_norm": 7.777877274694171, "learning_rate": 9.773038842345774e-06, "loss": 17.7231, "step": 1605 }, { "epoch": 0.029356389493117883, "grad_norm": 7.513074478390638, "learning_rate": 9.77913175932978e-06, "loss": 17.436, "step": 1606 }, { "epoch": 0.029374668689564405, "grad_norm": 7.792920525633787, "learning_rate": 9.785224676313785e-06, "loss": 18.1177, "step": 1607 }, { "epoch": 0.02939294788601093, "grad_norm": 6.898953111016352, "learning_rate": 9.791317593297793e-06, "loss": 17.8014, "step": 1608 }, { "epoch": 0.029411227082457456, "grad_norm": 8.330048553409595, "learning_rate": 9.797410510281798e-06, "loss": 18.1343, "step": 1609 }, { "epoch": 0.02942950627890398, "grad_norm": 8.156161288497497, "learning_rate": 9.803503427265804e-06, "loss": 18.1073, "step": 1610 }, { "epoch": 0.029447785475350503, "grad_norm": 7.707355244288337, "learning_rate": 9.80959634424981e-06, "loss": 17.8602, "step": 1611 }, { "epoch": 0.029466064671797028, "grad_norm": 6.982593291457571, "learning_rate": 9.815689261233816e-06, "loss": 17.3715, "step": 1612 }, { "epoch": 0.029484343868243553, "grad_norm": 7.6136798692364644, "learning_rate": 9.821782178217823e-06, "loss": 17.6577, "step": 1613 }, { "epoch": 0.029502623064690075, "grad_norm": 8.831629567786118, "learning_rate": 9.827875095201829e-06, "loss": 18.5191, "step": 1614 }, { "epoch": 0.0295209022611366, "grad_norm": 8.241696213267609, "learning_rate": 9.833968012185835e-06, "loss": 17.9193, "step": 1615 }, { "epoch": 0.029539181457583125, "grad_norm": 8.513703873129042, "learning_rate": 9.840060929169842e-06, "loss": 18.2604, "step": 1616 }, { "epoch": 0.02955746065402965, "grad_norm": 8.166623481874485, "learning_rate": 9.846153846153848e-06, "loss": 18.3146, "step": 1617 }, { "epoch": 0.029575739850476172, "grad_norm": 9.591123009193785, "learning_rate": 9.852246763137852e-06, "loss": 18.0625, "step": 1618 }, { "epoch": 0.029594019046922698, "grad_norm": 7.342378307838155, "learning_rate": 9.85833968012186e-06, "loss": 17.7567, "step": 1619 }, { "epoch": 0.029612298243369223, "grad_norm": 7.711579400603312, "learning_rate": 9.864432597105865e-06, "loss": 17.5905, "step": 1620 }, { "epoch": 0.029630577439815745, "grad_norm": 8.719640722806627, "learning_rate": 9.87052551408987e-06, "loss": 18.6454, "step": 1621 }, { "epoch": 0.02964885663626227, "grad_norm": 8.310001366312644, "learning_rate": 9.876618431073878e-06, "loss": 18.2321, "step": 1622 }, { "epoch": 0.029667135832708795, "grad_norm": 8.298224474526126, "learning_rate": 9.882711348057884e-06, "loss": 18.2345, "step": 1623 }, { "epoch": 0.029685415029155317, "grad_norm": 7.868201262455768, "learning_rate": 9.88880426504189e-06, "loss": 18.2417, "step": 1624 }, { "epoch": 0.029703694225601842, "grad_norm": 7.868651271556509, "learning_rate": 9.894897182025896e-06, "loss": 18.1694, "step": 1625 }, { "epoch": 0.029721973422048367, "grad_norm": 7.213672615749309, "learning_rate": 9.900990099009901e-06, "loss": 17.8703, "step": 1626 }, { "epoch": 0.029740252618494892, "grad_norm": 9.445225101439867, "learning_rate": 9.907083015993909e-06, "loss": 18.9733, "step": 1627 }, { "epoch": 0.029758531814941414, "grad_norm": 8.085466053201765, "learning_rate": 9.913175932977915e-06, "loss": 18.1514, "step": 1628 }, { "epoch": 0.02977681101138794, "grad_norm": 8.670797616389512, "learning_rate": 9.91926884996192e-06, "loss": 18.1964, "step": 1629 }, { "epoch": 0.029795090207834465, "grad_norm": 9.299323756435573, "learning_rate": 9.925361766945926e-06, "loss": 18.3876, "step": 1630 }, { "epoch": 0.029813369404280986, "grad_norm": 7.965960012792072, "learning_rate": 9.931454683929932e-06, "loss": 17.6732, "step": 1631 }, { "epoch": 0.02983164860072751, "grad_norm": 8.34160354865226, "learning_rate": 9.937547600913938e-06, "loss": 18.0594, "step": 1632 }, { "epoch": 0.029849927797174037, "grad_norm": 8.703633671131135, "learning_rate": 9.943640517897945e-06, "loss": 18.2577, "step": 1633 }, { "epoch": 0.029868206993620562, "grad_norm": 8.138745042647287, "learning_rate": 9.94973343488195e-06, "loss": 17.9059, "step": 1634 }, { "epoch": 0.029886486190067084, "grad_norm": 6.956669647829765, "learning_rate": 9.955826351865957e-06, "loss": 17.4469, "step": 1635 }, { "epoch": 0.02990476538651361, "grad_norm": 7.462595981410311, "learning_rate": 9.961919268849962e-06, "loss": 17.7349, "step": 1636 }, { "epoch": 0.029923044582960134, "grad_norm": 7.365790430570946, "learning_rate": 9.968012185833968e-06, "loss": 17.743, "step": 1637 }, { "epoch": 0.029941323779406656, "grad_norm": 7.9050808159133, "learning_rate": 9.974105102817975e-06, "loss": 17.8508, "step": 1638 }, { "epoch": 0.02995960297585318, "grad_norm": 7.08010023731484, "learning_rate": 9.980198019801981e-06, "loss": 17.6474, "step": 1639 }, { "epoch": 0.029977882172299707, "grad_norm": 7.241082228675219, "learning_rate": 9.986290936785987e-06, "loss": 17.5394, "step": 1640 }, { "epoch": 0.02999616136874623, "grad_norm": 6.70050213401771, "learning_rate": 9.992383853769993e-06, "loss": 17.5436, "step": 1641 }, { "epoch": 0.030014440565192754, "grad_norm": 8.603058800151555, "learning_rate": 9.998476770754e-06, "loss": 18.2784, "step": 1642 }, { "epoch": 0.03003271976163928, "grad_norm": 8.16200725958428, "learning_rate": 9.999999995071216e-06, "loss": 18.1774, "step": 1643 }, { "epoch": 0.030050998958085804, "grad_norm": 8.77092898355948, "learning_rate": 9.99999997316551e-06, "loss": 18.3002, "step": 1644 }, { "epoch": 0.030069278154532326, "grad_norm": 7.532952323927857, "learning_rate": 9.999999933735236e-06, "loss": 17.752, "step": 1645 }, { "epoch": 0.03008755735097885, "grad_norm": 7.604105027865724, "learning_rate": 9.999999876780395e-06, "loss": 17.7217, "step": 1646 }, { "epoch": 0.030105836547425376, "grad_norm": 8.217025734469807, "learning_rate": 9.99999980230099e-06, "loss": 18.2265, "step": 1647 }, { "epoch": 0.030124115743871898, "grad_norm": 7.19985957568226, "learning_rate": 9.99999971029702e-06, "loss": 17.6727, "step": 1648 }, { "epoch": 0.030142394940318423, "grad_norm": 7.4626368055052055, "learning_rate": 9.999999600768484e-06, "loss": 17.6681, "step": 1649 }, { "epoch": 0.03016067413676495, "grad_norm": 8.597444246697309, "learning_rate": 9.999999473715385e-06, "loss": 18.2233, "step": 1650 }, { "epoch": 0.030178953333211474, "grad_norm": 7.339821522451504, "learning_rate": 9.99999932913772e-06, "loss": 18.012, "step": 1651 }, { "epoch": 0.030197232529657995, "grad_norm": 9.495837231113391, "learning_rate": 9.99999916703549e-06, "loss": 18.7163, "step": 1652 }, { "epoch": 0.03021551172610452, "grad_norm": 8.153424924313496, "learning_rate": 9.999998987408699e-06, "loss": 18.1528, "step": 1653 }, { "epoch": 0.030233790922551046, "grad_norm": 7.4629859005548305, "learning_rate": 9.999998790257344e-06, "loss": 17.9068, "step": 1654 }, { "epoch": 0.030252070118997568, "grad_norm": 7.5700990178487695, "learning_rate": 9.99999857558143e-06, "loss": 17.6228, "step": 1655 }, { "epoch": 0.030270349315444093, "grad_norm": 8.497903245407256, "learning_rate": 9.99999834338095e-06, "loss": 18.1078, "step": 1656 }, { "epoch": 0.030288628511890618, "grad_norm": 8.217118353802384, "learning_rate": 9.999998093655913e-06, "loss": 18.2546, "step": 1657 }, { "epoch": 0.03030690770833714, "grad_norm": 7.85525980711628, "learning_rate": 9.999997826406315e-06, "loss": 17.9371, "step": 1658 }, { "epoch": 0.030325186904783665, "grad_norm": 7.506971097466327, "learning_rate": 9.99999754163216e-06, "loss": 17.9911, "step": 1659 }, { "epoch": 0.03034346610123019, "grad_norm": 7.483869241789261, "learning_rate": 9.999997239333448e-06, "loss": 17.6404, "step": 1660 }, { "epoch": 0.030361745297676716, "grad_norm": 7.8702369812607955, "learning_rate": 9.999996919510177e-06, "loss": 18.014, "step": 1661 }, { "epoch": 0.030380024494123237, "grad_norm": 7.478131951125737, "learning_rate": 9.999996582162353e-06, "loss": 17.9119, "step": 1662 }, { "epoch": 0.030398303690569763, "grad_norm": 6.891453312552426, "learning_rate": 9.999996227289975e-06, "loss": 17.5924, "step": 1663 }, { "epoch": 0.030416582887016288, "grad_norm": 7.649411616217506, "learning_rate": 9.999995854893042e-06, "loss": 17.8496, "step": 1664 }, { "epoch": 0.03043486208346281, "grad_norm": 7.687138540451615, "learning_rate": 9.999995464971559e-06, "loss": 18.0789, "step": 1665 }, { "epoch": 0.030453141279909335, "grad_norm": 6.754059755238239, "learning_rate": 9.999995057525525e-06, "loss": 17.5271, "step": 1666 }, { "epoch": 0.03047142047635586, "grad_norm": 9.442846909744565, "learning_rate": 9.999994632554943e-06, "loss": 18.5949, "step": 1667 }, { "epoch": 0.030489699672802385, "grad_norm": 8.563193819594666, "learning_rate": 9.999994190059814e-06, "loss": 18.1301, "step": 1668 }, { "epoch": 0.030507978869248907, "grad_norm": 8.829640791058583, "learning_rate": 9.999993730040137e-06, "loss": 17.6752, "step": 1669 }, { "epoch": 0.030526258065695432, "grad_norm": 7.8977755747954035, "learning_rate": 9.999993252495917e-06, "loss": 18.0845, "step": 1670 }, { "epoch": 0.030544537262141958, "grad_norm": 6.729064744656194, "learning_rate": 9.999992757427155e-06, "loss": 17.4574, "step": 1671 }, { "epoch": 0.03056281645858848, "grad_norm": 6.678060278276319, "learning_rate": 9.999992244833852e-06, "loss": 17.4589, "step": 1672 }, { "epoch": 0.030581095655035005, "grad_norm": 7.737125905516272, "learning_rate": 9.99999171471601e-06, "loss": 17.8177, "step": 1673 }, { "epoch": 0.03059937485148153, "grad_norm": 7.657764374448887, "learning_rate": 9.999991167073632e-06, "loss": 18.2387, "step": 1674 }, { "epoch": 0.03061765404792805, "grad_norm": 8.346199749612207, "learning_rate": 9.999990601906717e-06, "loss": 18.0601, "step": 1675 }, { "epoch": 0.030635933244374577, "grad_norm": 6.530886845125346, "learning_rate": 9.999990019215271e-06, "loss": 17.3699, "step": 1676 }, { "epoch": 0.030654212440821102, "grad_norm": 8.763388542357818, "learning_rate": 9.999989418999292e-06, "loss": 18.0928, "step": 1677 }, { "epoch": 0.030672491637267627, "grad_norm": 8.045196888267036, "learning_rate": 9.999988801258785e-06, "loss": 17.999, "step": 1678 }, { "epoch": 0.03069077083371415, "grad_norm": 9.041537569253046, "learning_rate": 9.999988165993751e-06, "loss": 18.5063, "step": 1679 }, { "epoch": 0.030709050030160674, "grad_norm": 8.317873113243014, "learning_rate": 9.999987513204192e-06, "loss": 18.4539, "step": 1680 }, { "epoch": 0.0307273292266072, "grad_norm": 7.850982443122049, "learning_rate": 9.99998684289011e-06, "loss": 18.1377, "step": 1681 }, { "epoch": 0.03074560842305372, "grad_norm": 8.09374934043548, "learning_rate": 9.999986155051508e-06, "loss": 17.9622, "step": 1682 }, { "epoch": 0.030763887619500246, "grad_norm": 8.2254588213774, "learning_rate": 9.99998544968839e-06, "loss": 18.2687, "step": 1683 }, { "epoch": 0.03078216681594677, "grad_norm": 7.751994688589054, "learning_rate": 9.999984726800756e-06, "loss": 18.1268, "step": 1684 }, { "epoch": 0.030800446012393297, "grad_norm": 8.142840854061875, "learning_rate": 9.99998398638861e-06, "loss": 18.0481, "step": 1685 }, { "epoch": 0.03081872520883982, "grad_norm": 7.412709744013709, "learning_rate": 9.999983228451953e-06, "loss": 17.6665, "step": 1686 }, { "epoch": 0.030837004405286344, "grad_norm": 8.274057649578133, "learning_rate": 9.999982452990789e-06, "loss": 18.1678, "step": 1687 }, { "epoch": 0.03085528360173287, "grad_norm": 7.733422141237051, "learning_rate": 9.99998166000512e-06, "loss": 18.0693, "step": 1688 }, { "epoch": 0.03087356279817939, "grad_norm": 6.331266037416006, "learning_rate": 9.99998084949495e-06, "loss": 17.3541, "step": 1689 }, { "epoch": 0.030891841994625916, "grad_norm": 8.981301420778157, "learning_rate": 9.99998002146028e-06, "loss": 18.3912, "step": 1690 }, { "epoch": 0.03091012119107244, "grad_norm": 8.480155189671931, "learning_rate": 9.999979175901116e-06, "loss": 18.6498, "step": 1691 }, { "epoch": 0.030928400387518963, "grad_norm": 8.869112873111437, "learning_rate": 9.999978312817455e-06, "loss": 18.1799, "step": 1692 }, { "epoch": 0.03094667958396549, "grad_norm": 7.102599203870701, "learning_rate": 9.999977432209306e-06, "loss": 17.654, "step": 1693 }, { "epoch": 0.030964958780412014, "grad_norm": 7.914076704773177, "learning_rate": 9.999976534076672e-06, "loss": 17.7237, "step": 1694 }, { "epoch": 0.03098323797685854, "grad_norm": 7.402304949605478, "learning_rate": 9.999975618419553e-06, "loss": 17.7698, "step": 1695 }, { "epoch": 0.03100151717330506, "grad_norm": 7.1955102355883565, "learning_rate": 9.999974685237951e-06, "loss": 17.6303, "step": 1696 }, { "epoch": 0.031019796369751586, "grad_norm": 7.944128019819306, "learning_rate": 9.999973734531873e-06, "loss": 17.8963, "step": 1697 }, { "epoch": 0.03103807556619811, "grad_norm": 7.73052692176272, "learning_rate": 9.999972766301323e-06, "loss": 18.0474, "step": 1698 }, { "epoch": 0.031056354762644633, "grad_norm": 8.060643549825397, "learning_rate": 9.9999717805463e-06, "loss": 18.2613, "step": 1699 }, { "epoch": 0.031074633959091158, "grad_norm": 6.903036766193613, "learning_rate": 9.99997077726681e-06, "loss": 17.5884, "step": 1700 }, { "epoch": 0.031092913155537683, "grad_norm": 7.937945436907774, "learning_rate": 9.999969756462858e-06, "loss": 18.2496, "step": 1701 }, { "epoch": 0.03111119235198421, "grad_norm": 8.85048821164819, "learning_rate": 9.999968718134443e-06, "loss": 18.4589, "step": 1702 }, { "epoch": 0.03112947154843073, "grad_norm": 6.437826666373901, "learning_rate": 9.999967662281574e-06, "loss": 17.6258, "step": 1703 }, { "epoch": 0.031147750744877255, "grad_norm": 8.149168186932878, "learning_rate": 9.99996658890425e-06, "loss": 18.0005, "step": 1704 }, { "epoch": 0.03116602994132378, "grad_norm": 8.46177987736004, "learning_rate": 9.99996549800248e-06, "loss": 18.4022, "step": 1705 }, { "epoch": 0.031184309137770302, "grad_norm": 7.1207036582000365, "learning_rate": 9.999964389576262e-06, "loss": 17.7006, "step": 1706 }, { "epoch": 0.031202588334216828, "grad_norm": 8.74135307600333, "learning_rate": 9.999963263625604e-06, "loss": 18.1906, "step": 1707 }, { "epoch": 0.031220867530663353, "grad_norm": 7.094062218917512, "learning_rate": 9.999962120150507e-06, "loss": 17.5309, "step": 1708 }, { "epoch": 0.031239146727109875, "grad_norm": 6.13369738189881, "learning_rate": 9.99996095915098e-06, "loss": 17.1825, "step": 1709 }, { "epoch": 0.0312574259235564, "grad_norm": 8.334653328554893, "learning_rate": 9.999959780627021e-06, "loss": 18.2506, "step": 1710 }, { "epoch": 0.031275705120002925, "grad_norm": 7.9067533539838815, "learning_rate": 9.999958584578638e-06, "loss": 18.3141, "step": 1711 }, { "epoch": 0.03129398431644945, "grad_norm": 7.669650189051857, "learning_rate": 9.999957371005833e-06, "loss": 17.7663, "step": 1712 }, { "epoch": 0.031312263512895976, "grad_norm": 6.893918883243496, "learning_rate": 9.999956139908613e-06, "loss": 17.5321, "step": 1713 }, { "epoch": 0.0313305427093425, "grad_norm": 8.880750021766438, "learning_rate": 9.999954891286978e-06, "loss": 18.2603, "step": 1714 }, { "epoch": 0.03134882190578902, "grad_norm": 7.745527335498254, "learning_rate": 9.999953625140938e-06, "loss": 18.1199, "step": 1715 }, { "epoch": 0.03136710110223555, "grad_norm": 8.629927372918612, "learning_rate": 9.999952341470492e-06, "loss": 18.442, "step": 1716 }, { "epoch": 0.03138538029868207, "grad_norm": 9.315829947449322, "learning_rate": 9.999951040275648e-06, "loss": 18.4559, "step": 1717 }, { "epoch": 0.03140365949512859, "grad_norm": 8.011886242632372, "learning_rate": 9.99994972155641e-06, "loss": 17.8989, "step": 1718 }, { "epoch": 0.03142193869157512, "grad_norm": 8.688456567864376, "learning_rate": 9.99994838531278e-06, "loss": 17.9268, "step": 1719 }, { "epoch": 0.03144021788802164, "grad_norm": 7.863237480162188, "learning_rate": 9.999947031544768e-06, "loss": 18.0234, "step": 1720 }, { "epoch": 0.031458497084468164, "grad_norm": 9.614738708606128, "learning_rate": 9.999945660252372e-06, "loss": 19.0878, "step": 1721 }, { "epoch": 0.03147677628091469, "grad_norm": 7.484839742830578, "learning_rate": 9.999944271435604e-06, "loss": 17.7975, "step": 1722 }, { "epoch": 0.031495055477361214, "grad_norm": 8.302144392083973, "learning_rate": 9.999942865094463e-06, "loss": 18.1382, "step": 1723 }, { "epoch": 0.03151333467380774, "grad_norm": 8.800750917473952, "learning_rate": 9.999941441228955e-06, "loss": 18.5514, "step": 1724 }, { "epoch": 0.031531613870254264, "grad_norm": 8.085960134063056, "learning_rate": 9.999939999839087e-06, "loss": 18.0244, "step": 1725 }, { "epoch": 0.031549893066700786, "grad_norm": 8.970989611370843, "learning_rate": 9.999938540924865e-06, "loss": 18.4848, "step": 1726 }, { "epoch": 0.031568172263147315, "grad_norm": 13.206975008573506, "learning_rate": 9.999937064486292e-06, "loss": 18.5302, "step": 1727 }, { "epoch": 0.03158645145959384, "grad_norm": 7.733792813452686, "learning_rate": 9.999935570523371e-06, "loss": 17.971, "step": 1728 }, { "epoch": 0.03160473065604036, "grad_norm": 8.211191887685656, "learning_rate": 9.999934059036111e-06, "loss": 17.9593, "step": 1729 }, { "epoch": 0.03162300985248689, "grad_norm": 7.575399705091664, "learning_rate": 9.999932530024517e-06, "loss": 18.111, "step": 1730 }, { "epoch": 0.03164128904893341, "grad_norm": 7.464121770275439, "learning_rate": 9.999930983488592e-06, "loss": 17.9209, "step": 1731 }, { "epoch": 0.03165956824537993, "grad_norm": 7.272034710638791, "learning_rate": 9.999929419428345e-06, "loss": 17.7786, "step": 1732 }, { "epoch": 0.03167784744182646, "grad_norm": 8.457109924726934, "learning_rate": 9.999927837843778e-06, "loss": 18.276, "step": 1733 }, { "epoch": 0.03169612663827298, "grad_norm": 9.83487092928988, "learning_rate": 9.999926238734896e-06, "loss": 18.1567, "step": 1734 }, { "epoch": 0.0317144058347195, "grad_norm": 7.058737498003848, "learning_rate": 9.999924622101708e-06, "loss": 17.4511, "step": 1735 }, { "epoch": 0.03173268503116603, "grad_norm": 17.665633839089814, "learning_rate": 9.999922987944218e-06, "loss": 18.6444, "step": 1736 }, { "epoch": 0.03175096422761255, "grad_norm": 7.935431722583716, "learning_rate": 9.999921336262432e-06, "loss": 17.7928, "step": 1737 }, { "epoch": 0.031769243424059075, "grad_norm": 7.7888607207513205, "learning_rate": 9.999919667056355e-06, "loss": 18.1576, "step": 1738 }, { "epoch": 0.031787522620505604, "grad_norm": 8.223768512472157, "learning_rate": 9.999917980325993e-06, "loss": 18.2164, "step": 1739 }, { "epoch": 0.031805801816952126, "grad_norm": 7.552464065738869, "learning_rate": 9.999916276071352e-06, "loss": 17.7676, "step": 1740 }, { "epoch": 0.031824081013398654, "grad_norm": 7.752283479585389, "learning_rate": 9.99991455429244e-06, "loss": 17.7247, "step": 1741 }, { "epoch": 0.031842360209845176, "grad_norm": 8.32808808108957, "learning_rate": 9.99991281498926e-06, "loss": 18.2492, "step": 1742 }, { "epoch": 0.0318606394062917, "grad_norm": 7.311715080136691, "learning_rate": 9.999911058161821e-06, "loss": 17.6998, "step": 1743 }, { "epoch": 0.031878918602738227, "grad_norm": 7.809550576897194, "learning_rate": 9.999909283810127e-06, "loss": 17.8632, "step": 1744 }, { "epoch": 0.03189719779918475, "grad_norm": 8.643321746507269, "learning_rate": 9.999907491934184e-06, "loss": 18.3235, "step": 1745 }, { "epoch": 0.03191547699563127, "grad_norm": 8.69193791200664, "learning_rate": 9.999905682534002e-06, "loss": 18.1556, "step": 1746 }, { "epoch": 0.0319337561920778, "grad_norm": 7.41714272691651, "learning_rate": 9.99990385560958e-06, "loss": 17.4267, "step": 1747 }, { "epoch": 0.03195203538852432, "grad_norm": 7.647497052491643, "learning_rate": 9.99990201116093e-06, "loss": 17.8335, "step": 1748 }, { "epoch": 0.03197031458497084, "grad_norm": 8.250167251223719, "learning_rate": 9.99990014918806e-06, "loss": 17.9621, "step": 1749 }, { "epoch": 0.03198859378141737, "grad_norm": 7.817423589544294, "learning_rate": 9.999898269690972e-06, "loss": 18.0841, "step": 1750 }, { "epoch": 0.03200687297786389, "grad_norm": 9.061676820807548, "learning_rate": 9.999896372669675e-06, "loss": 18.3327, "step": 1751 }, { "epoch": 0.032025152174310414, "grad_norm": 8.171012294543834, "learning_rate": 9.999894458124176e-06, "loss": 18.2276, "step": 1752 }, { "epoch": 0.03204343137075694, "grad_norm": 8.81443248428237, "learning_rate": 9.99989252605448e-06, "loss": 18.2164, "step": 1753 }, { "epoch": 0.032061710567203465, "grad_norm": 8.244576239047426, "learning_rate": 9.999890576460593e-06, "loss": 18.0588, "step": 1754 }, { "epoch": 0.03207998976364999, "grad_norm": 7.500940593794085, "learning_rate": 9.999888609342523e-06, "loss": 17.9209, "step": 1755 }, { "epoch": 0.032098268960096515, "grad_norm": 8.445595785573039, "learning_rate": 9.99988662470028e-06, "loss": 18.0948, "step": 1756 }, { "epoch": 0.03211654815654304, "grad_norm": 8.781581898387014, "learning_rate": 9.999884622533866e-06, "loss": 18.289, "step": 1757 }, { "epoch": 0.032134827352989566, "grad_norm": 8.653088007149135, "learning_rate": 9.999882602843292e-06, "loss": 18.0247, "step": 1758 }, { "epoch": 0.03215310654943609, "grad_norm": 8.900805886681988, "learning_rate": 9.999880565628564e-06, "loss": 18.0382, "step": 1759 }, { "epoch": 0.03217138574588261, "grad_norm": 6.761814575284438, "learning_rate": 9.999878510889686e-06, "loss": 17.549, "step": 1760 }, { "epoch": 0.03218966494232914, "grad_norm": 8.382118165256745, "learning_rate": 9.999876438626669e-06, "loss": 18.1429, "step": 1761 }, { "epoch": 0.03220794413877566, "grad_norm": 8.748807697101075, "learning_rate": 9.99987434883952e-06, "loss": 17.8109, "step": 1762 }, { "epoch": 0.03222622333522218, "grad_norm": 8.559930526855778, "learning_rate": 9.999872241528244e-06, "loss": 18.5522, "step": 1763 }, { "epoch": 0.03224450253166871, "grad_norm": 7.741650185924381, "learning_rate": 9.99987011669285e-06, "loss": 17.8154, "step": 1764 }, { "epoch": 0.03226278172811523, "grad_norm": 6.293094129356059, "learning_rate": 9.999867974333345e-06, "loss": 17.4139, "step": 1765 }, { "epoch": 0.032281060924561754, "grad_norm": 7.907972598846681, "learning_rate": 9.999865814449734e-06, "loss": 18.1276, "step": 1766 }, { "epoch": 0.03229934012100828, "grad_norm": 8.237264405705627, "learning_rate": 9.99986363704203e-06, "loss": 18.3052, "step": 1767 }, { "epoch": 0.032317619317454804, "grad_norm": 9.82774985313453, "learning_rate": 9.999861442110238e-06, "loss": 18.9897, "step": 1768 }, { "epoch": 0.032335898513901326, "grad_norm": 6.961508335763593, "learning_rate": 9.999859229654364e-06, "loss": 17.55, "step": 1769 }, { "epoch": 0.032354177710347855, "grad_norm": 7.384652934661802, "learning_rate": 9.99985699967442e-06, "loss": 17.5297, "step": 1770 }, { "epoch": 0.03237245690679438, "grad_norm": 7.411518265275107, "learning_rate": 9.999854752170409e-06, "loss": 17.6561, "step": 1771 }, { "epoch": 0.0323907361032409, "grad_norm": 9.123059934482233, "learning_rate": 9.99985248714234e-06, "loss": 18.4026, "step": 1772 }, { "epoch": 0.03240901529968743, "grad_norm": 8.417191475521836, "learning_rate": 9.999850204590224e-06, "loss": 17.8137, "step": 1773 }, { "epoch": 0.03242729449613395, "grad_norm": 10.61543485670314, "learning_rate": 9.999847904514066e-06, "loss": 18.8098, "step": 1774 }, { "epoch": 0.03244557369258048, "grad_norm": 8.532504542166224, "learning_rate": 9.999845586913876e-06, "loss": 18.3173, "step": 1775 }, { "epoch": 0.032463852889027, "grad_norm": 7.43589045622056, "learning_rate": 9.999843251789659e-06, "loss": 17.5071, "step": 1776 }, { "epoch": 0.03248213208547352, "grad_norm": 8.617687971638343, "learning_rate": 9.999840899141426e-06, "loss": 18.0293, "step": 1777 }, { "epoch": 0.03250041128192005, "grad_norm": 6.778603432440775, "learning_rate": 9.999838528969186e-06, "loss": 17.6333, "step": 1778 }, { "epoch": 0.03251869047836657, "grad_norm": 9.147403305663513, "learning_rate": 9.999836141272945e-06, "loss": 18.2796, "step": 1779 }, { "epoch": 0.03253696967481309, "grad_norm": 8.132127085631549, "learning_rate": 9.99983373605271e-06, "loss": 17.8568, "step": 1780 }, { "epoch": 0.03255524887125962, "grad_norm": 7.365472948844682, "learning_rate": 9.999831313308495e-06, "loss": 17.6739, "step": 1781 }, { "epoch": 0.032573528067706144, "grad_norm": 8.497651957313906, "learning_rate": 9.999828873040303e-06, "loss": 18.2875, "step": 1782 }, { "epoch": 0.032591807264152665, "grad_norm": 8.484429761607142, "learning_rate": 9.999826415248146e-06, "loss": 18.1602, "step": 1783 }, { "epoch": 0.032610086460599194, "grad_norm": 9.548530803029397, "learning_rate": 9.999823939932031e-06, "loss": 19.0218, "step": 1784 }, { "epoch": 0.032628365657045716, "grad_norm": 6.485042818984111, "learning_rate": 9.999821447091967e-06, "loss": 17.4927, "step": 1785 }, { "epoch": 0.03264664485349224, "grad_norm": 8.360808490797504, "learning_rate": 9.999818936727963e-06, "loss": 18.4266, "step": 1786 }, { "epoch": 0.032664924049938766, "grad_norm": 6.781187397765794, "learning_rate": 9.999816408840024e-06, "loss": 17.5942, "step": 1787 }, { "epoch": 0.03268320324638529, "grad_norm": 6.312570278027369, "learning_rate": 9.999813863428167e-06, "loss": 17.3115, "step": 1788 }, { "epoch": 0.03270148244283181, "grad_norm": 8.02380292866149, "learning_rate": 9.999811300492394e-06, "loss": 17.8317, "step": 1789 }, { "epoch": 0.03271976163927834, "grad_norm": 9.2842251146869, "learning_rate": 9.999808720032717e-06, "loss": 18.6453, "step": 1790 }, { "epoch": 0.03273804083572486, "grad_norm": 7.710705644639956, "learning_rate": 9.999806122049144e-06, "loss": 17.9747, "step": 1791 }, { "epoch": 0.03275632003217139, "grad_norm": 7.297062474874555, "learning_rate": 9.999803506541683e-06, "loss": 17.7699, "step": 1792 }, { "epoch": 0.03277459922861791, "grad_norm": 7.1899855935789505, "learning_rate": 9.999800873510347e-06, "loss": 17.5291, "step": 1793 }, { "epoch": 0.03279287842506443, "grad_norm": 7.666647090498624, "learning_rate": 9.999798222955142e-06, "loss": 18.088, "step": 1794 }, { "epoch": 0.03281115762151096, "grad_norm": 8.884884975344518, "learning_rate": 9.999795554876078e-06, "loss": 18.2303, "step": 1795 }, { "epoch": 0.03282943681795748, "grad_norm": 7.208892860970315, "learning_rate": 9.999792869273165e-06, "loss": 17.2642, "step": 1796 }, { "epoch": 0.032847716014404005, "grad_norm": 6.873435544437457, "learning_rate": 9.99979016614641e-06, "loss": 17.6393, "step": 1797 }, { "epoch": 0.032865995210850533, "grad_norm": 8.38149535209502, "learning_rate": 9.999787445495825e-06, "loss": 18.1904, "step": 1798 }, { "epoch": 0.032884274407297055, "grad_norm": 7.783435755971429, "learning_rate": 9.999784707321419e-06, "loss": 18.0207, "step": 1799 }, { "epoch": 0.03290255360374358, "grad_norm": 8.311374471647397, "learning_rate": 9.999781951623202e-06, "loss": 18.3539, "step": 1800 }, { "epoch": 0.032920832800190106, "grad_norm": 8.174072765469017, "learning_rate": 9.999779178401183e-06, "loss": 17.9323, "step": 1801 }, { "epoch": 0.03293911199663663, "grad_norm": 7.232056843641817, "learning_rate": 9.999776387655372e-06, "loss": 17.8261, "step": 1802 }, { "epoch": 0.03295739119308315, "grad_norm": 8.218568405365268, "learning_rate": 9.999773579385779e-06, "loss": 18.2206, "step": 1803 }, { "epoch": 0.03297567038952968, "grad_norm": 8.093855359353427, "learning_rate": 9.999770753592413e-06, "loss": 18.1971, "step": 1804 }, { "epoch": 0.0329939495859762, "grad_norm": 8.901442169351329, "learning_rate": 9.999767910275283e-06, "loss": 18.1861, "step": 1805 }, { "epoch": 0.03301222878242272, "grad_norm": 7.670484431715468, "learning_rate": 9.999765049434403e-06, "loss": 17.7351, "step": 1806 }, { "epoch": 0.03303050797886925, "grad_norm": 8.876821800472102, "learning_rate": 9.999762171069777e-06, "loss": 18.2117, "step": 1807 }, { "epoch": 0.03304878717531577, "grad_norm": 7.313675489864291, "learning_rate": 9.999759275181421e-06, "loss": 17.8017, "step": 1808 }, { "epoch": 0.0330670663717623, "grad_norm": 8.396988203693653, "learning_rate": 9.999756361769342e-06, "loss": 18.074, "step": 1809 }, { "epoch": 0.03308534556820882, "grad_norm": 7.325734865195764, "learning_rate": 9.99975343083355e-06, "loss": 17.8234, "step": 1810 }, { "epoch": 0.033103624764655344, "grad_norm": 7.32385429488872, "learning_rate": 9.999750482374057e-06, "loss": 17.6628, "step": 1811 }, { "epoch": 0.03312190396110187, "grad_norm": 7.584415550741578, "learning_rate": 9.999747516390872e-06, "loss": 17.8922, "step": 1812 }, { "epoch": 0.033140183157548395, "grad_norm": 6.8395529500348164, "learning_rate": 9.999744532884006e-06, "loss": 17.7227, "step": 1813 }, { "epoch": 0.033158462353994916, "grad_norm": 7.67886942130527, "learning_rate": 9.999741531853469e-06, "loss": 17.9991, "step": 1814 }, { "epoch": 0.033176741550441445, "grad_norm": 7.686021597942992, "learning_rate": 9.999738513299273e-06, "loss": 18.0536, "step": 1815 }, { "epoch": 0.03319502074688797, "grad_norm": 7.563094170574433, "learning_rate": 9.999735477221426e-06, "loss": 17.8883, "step": 1816 }, { "epoch": 0.03321329994333449, "grad_norm": 6.360992366183429, "learning_rate": 9.999732423619941e-06, "loss": 17.331, "step": 1817 }, { "epoch": 0.03323157913978102, "grad_norm": 6.9729700691565535, "learning_rate": 9.999729352494827e-06, "loss": 17.9784, "step": 1818 }, { "epoch": 0.03324985833622754, "grad_norm": 8.680611695987398, "learning_rate": 9.999726263846096e-06, "loss": 18.2799, "step": 1819 }, { "epoch": 0.03326813753267406, "grad_norm": 6.483530997935041, "learning_rate": 9.999723157673758e-06, "loss": 17.3454, "step": 1820 }, { "epoch": 0.03328641672912059, "grad_norm": 7.496250705900263, "learning_rate": 9.999720033977824e-06, "loss": 17.8439, "step": 1821 }, { "epoch": 0.03330469592556711, "grad_norm": 7.462704294828384, "learning_rate": 9.999716892758305e-06, "loss": 17.7355, "step": 1822 }, { "epoch": 0.03332297512201363, "grad_norm": 9.644458381912422, "learning_rate": 9.999713734015212e-06, "loss": 18.4893, "step": 1823 }, { "epoch": 0.03334125431846016, "grad_norm": 7.666527601067586, "learning_rate": 9.999710557748557e-06, "loss": 17.9815, "step": 1824 }, { "epoch": 0.033359533514906684, "grad_norm": 7.864692354523234, "learning_rate": 9.99970736395835e-06, "loss": 17.9709, "step": 1825 }, { "epoch": 0.03337781271135321, "grad_norm": 8.264337407873395, "learning_rate": 9.999704152644603e-06, "loss": 17.7038, "step": 1826 }, { "epoch": 0.033396091907799734, "grad_norm": 7.969120135020211, "learning_rate": 9.999700923807326e-06, "loss": 18.1438, "step": 1827 }, { "epoch": 0.033414371104246256, "grad_norm": 8.931013694821827, "learning_rate": 9.999697677446531e-06, "loss": 18.0319, "step": 1828 }, { "epoch": 0.033432650300692784, "grad_norm": 6.194512930888271, "learning_rate": 9.999694413562231e-06, "loss": 17.2692, "step": 1829 }, { "epoch": 0.033450929497139306, "grad_norm": 7.971082112958304, "learning_rate": 9.999691132154435e-06, "loss": 17.8576, "step": 1830 }, { "epoch": 0.03346920869358583, "grad_norm": 9.431769907366288, "learning_rate": 9.999687833223155e-06, "loss": 18.2726, "step": 1831 }, { "epoch": 0.03348748789003236, "grad_norm": 8.729898612350182, "learning_rate": 9.999684516768402e-06, "loss": 18.335, "step": 1832 }, { "epoch": 0.03350576708647888, "grad_norm": 7.8105456031618505, "learning_rate": 9.999681182790191e-06, "loss": 18.1409, "step": 1833 }, { "epoch": 0.0335240462829254, "grad_norm": 7.747628228044708, "learning_rate": 9.99967783128853e-06, "loss": 17.727, "step": 1834 }, { "epoch": 0.03354232547937193, "grad_norm": 6.45043338847289, "learning_rate": 9.999674462263434e-06, "loss": 17.2659, "step": 1835 }, { "epoch": 0.03356060467581845, "grad_norm": 8.008648065130282, "learning_rate": 9.999671075714909e-06, "loss": 17.9419, "step": 1836 }, { "epoch": 0.03357888387226497, "grad_norm": 8.439711355653493, "learning_rate": 9.999667671642975e-06, "loss": 17.9714, "step": 1837 }, { "epoch": 0.0335971630687115, "grad_norm": 8.097927086887031, "learning_rate": 9.999664250047636e-06, "loss": 18.2587, "step": 1838 }, { "epoch": 0.03361544226515802, "grad_norm": 6.714611037306719, "learning_rate": 9.99966081092891e-06, "loss": 17.5767, "step": 1839 }, { "epoch": 0.033633721461604545, "grad_norm": 6.919967731204464, "learning_rate": 9.999657354286806e-06, "loss": 17.6317, "step": 1840 }, { "epoch": 0.03365200065805107, "grad_norm": 8.150508868141921, "learning_rate": 9.999653880121336e-06, "loss": 18.2641, "step": 1841 }, { "epoch": 0.033670279854497595, "grad_norm": 7.5496710279480155, "learning_rate": 9.999650388432513e-06, "loss": 18.2013, "step": 1842 }, { "epoch": 0.033688559050944124, "grad_norm": 7.531807395143053, "learning_rate": 9.99964687922035e-06, "loss": 17.5703, "step": 1843 }, { "epoch": 0.033706838247390646, "grad_norm": 8.08451813867511, "learning_rate": 9.99964335248486e-06, "loss": 18.0743, "step": 1844 }, { "epoch": 0.03372511744383717, "grad_norm": 7.861686401413494, "learning_rate": 9.999639808226051e-06, "loss": 18.0132, "step": 1845 }, { "epoch": 0.033743396640283696, "grad_norm": 9.898916711748324, "learning_rate": 9.999636246443941e-06, "loss": 18.6431, "step": 1846 }, { "epoch": 0.03376167583673022, "grad_norm": 7.855318071091838, "learning_rate": 9.999632667138539e-06, "loss": 17.876, "step": 1847 }, { "epoch": 0.03377995503317674, "grad_norm": 7.994621814215297, "learning_rate": 9.999629070309858e-06, "loss": 18.2559, "step": 1848 }, { "epoch": 0.03379823422962327, "grad_norm": 8.328924256453455, "learning_rate": 9.999625455957912e-06, "loss": 18.039, "step": 1849 }, { "epoch": 0.03381651342606979, "grad_norm": 7.423442934416348, "learning_rate": 9.99962182408271e-06, "loss": 17.8073, "step": 1850 }, { "epoch": 0.03383479262251631, "grad_norm": 8.670113783939007, "learning_rate": 9.99961817468427e-06, "loss": 18.0425, "step": 1851 }, { "epoch": 0.03385307181896284, "grad_norm": 8.341152385144946, "learning_rate": 9.9996145077626e-06, "loss": 18.3054, "step": 1852 }, { "epoch": 0.03387135101540936, "grad_norm": 6.040590275864804, "learning_rate": 9.999610823317716e-06, "loss": 17.2615, "step": 1853 }, { "epoch": 0.033889630211855884, "grad_norm": 6.908992376375361, "learning_rate": 9.99960712134963e-06, "loss": 17.6936, "step": 1854 }, { "epoch": 0.03390790940830241, "grad_norm": 7.7583741758822695, "learning_rate": 9.999603401858354e-06, "loss": 17.8515, "step": 1855 }, { "epoch": 0.033926188604748934, "grad_norm": 7.975857615183032, "learning_rate": 9.999599664843903e-06, "loss": 18.2947, "step": 1856 }, { "epoch": 0.033944467801195456, "grad_norm": 9.107143634269054, "learning_rate": 9.99959591030629e-06, "loss": 18.2875, "step": 1857 }, { "epoch": 0.033962746997641985, "grad_norm": 8.885538749469587, "learning_rate": 9.999592138245524e-06, "loss": 18.4399, "step": 1858 }, { "epoch": 0.03398102619408851, "grad_norm": 7.619089325646182, "learning_rate": 9.999588348661625e-06, "loss": 18.0025, "step": 1859 }, { "epoch": 0.033999305390535035, "grad_norm": 7.923948501979195, "learning_rate": 9.9995845415546e-06, "loss": 17.952, "step": 1860 }, { "epoch": 0.03401758458698156, "grad_norm": 7.556172395801596, "learning_rate": 9.999580716924467e-06, "loss": 17.8152, "step": 1861 }, { "epoch": 0.03403586378342808, "grad_norm": 7.908539024199279, "learning_rate": 9.999576874771236e-06, "loss": 18.2262, "step": 1862 }, { "epoch": 0.03405414297987461, "grad_norm": 7.590176741000986, "learning_rate": 9.999573015094921e-06, "loss": 17.5107, "step": 1863 }, { "epoch": 0.03407242217632113, "grad_norm": 10.501502046595387, "learning_rate": 9.99956913789554e-06, "loss": 19.0452, "step": 1864 }, { "epoch": 0.03409070137276765, "grad_norm": 8.724630148307154, "learning_rate": 9.999565243173099e-06, "loss": 18.1187, "step": 1865 }, { "epoch": 0.03410898056921418, "grad_norm": 6.192883744104812, "learning_rate": 9.999561330927619e-06, "loss": 17.2361, "step": 1866 }, { "epoch": 0.0341272597656607, "grad_norm": 7.32055343370581, "learning_rate": 9.999557401159107e-06, "loss": 18.113, "step": 1867 }, { "epoch": 0.03414553896210722, "grad_norm": 7.015490518486996, "learning_rate": 9.999553453867583e-06, "loss": 17.7648, "step": 1868 }, { "epoch": 0.03416381815855375, "grad_norm": 7.479288524521664, "learning_rate": 9.999549489053056e-06, "loss": 17.6716, "step": 1869 }, { "epoch": 0.034182097355000274, "grad_norm": 9.573109660956048, "learning_rate": 9.999545506715544e-06, "loss": 18.5602, "step": 1870 }, { "epoch": 0.034200376551446796, "grad_norm": 7.398144103675194, "learning_rate": 9.999541506855058e-06, "loss": 17.7404, "step": 1871 }, { "epoch": 0.034218655747893324, "grad_norm": 7.95634507515605, "learning_rate": 9.999537489471612e-06, "loss": 18.0822, "step": 1872 }, { "epoch": 0.034236934944339846, "grad_norm": 7.556869318283255, "learning_rate": 9.999533454565222e-06, "loss": 17.8391, "step": 1873 }, { "epoch": 0.03425521414078637, "grad_norm": 7.993039144663855, "learning_rate": 9.999529402135899e-06, "loss": 18.0815, "step": 1874 }, { "epoch": 0.034273493337232896, "grad_norm": 8.544069018131108, "learning_rate": 9.999525332183662e-06, "loss": 18.4803, "step": 1875 }, { "epoch": 0.03429177253367942, "grad_norm": 9.479112947375867, "learning_rate": 9.99952124470852e-06, "loss": 18.4248, "step": 1876 }, { "epoch": 0.03431005173012595, "grad_norm": 7.197687460803949, "learning_rate": 9.999517139710493e-06, "loss": 17.9663, "step": 1877 }, { "epoch": 0.03432833092657247, "grad_norm": 8.248143550009049, "learning_rate": 9.99951301718959e-06, "loss": 18.4598, "step": 1878 }, { "epoch": 0.03434661012301899, "grad_norm": 8.299650311859507, "learning_rate": 9.999508877145827e-06, "loss": 18.1611, "step": 1879 }, { "epoch": 0.03436488931946552, "grad_norm": 7.568158080890924, "learning_rate": 9.999504719579221e-06, "loss": 17.8868, "step": 1880 }, { "epoch": 0.03438316851591204, "grad_norm": 7.415992966366197, "learning_rate": 9.999500544489785e-06, "loss": 17.7056, "step": 1881 }, { "epoch": 0.03440144771235856, "grad_norm": 8.916900753766164, "learning_rate": 9.999496351877533e-06, "loss": 18.3458, "step": 1882 }, { "epoch": 0.03441972690880509, "grad_norm": 8.935175191478718, "learning_rate": 9.99949214174248e-06, "loss": 18.6321, "step": 1883 }, { "epoch": 0.03443800610525161, "grad_norm": 7.722295304191317, "learning_rate": 9.99948791408464e-06, "loss": 17.9493, "step": 1884 }, { "epoch": 0.034456285301698135, "grad_norm": 8.007541693731353, "learning_rate": 9.999483668904029e-06, "loss": 18.2557, "step": 1885 }, { "epoch": 0.034474564498144664, "grad_norm": 6.611765754812756, "learning_rate": 9.999479406200663e-06, "loss": 17.3192, "step": 1886 }, { "epoch": 0.034492843694591185, "grad_norm": 6.3385108608479985, "learning_rate": 9.999475125974553e-06, "loss": 17.2715, "step": 1887 }, { "epoch": 0.03451112289103771, "grad_norm": 7.520879210002364, "learning_rate": 9.999470828225718e-06, "loss": 17.829, "step": 1888 }, { "epoch": 0.034529402087484236, "grad_norm": 7.494377127553713, "learning_rate": 9.999466512954173e-06, "loss": 17.7672, "step": 1889 }, { "epoch": 0.03454768128393076, "grad_norm": 7.763998443291807, "learning_rate": 9.99946218015993e-06, "loss": 17.8187, "step": 1890 }, { "epoch": 0.03456596048037728, "grad_norm": 7.289771091936282, "learning_rate": 9.999457829843005e-06, "loss": 17.796, "step": 1891 }, { "epoch": 0.03458423967682381, "grad_norm": 6.623575404079241, "learning_rate": 9.999453462003417e-06, "loss": 17.4244, "step": 1892 }, { "epoch": 0.03460251887327033, "grad_norm": 10.017125767421753, "learning_rate": 9.999449076641176e-06, "loss": 18.8644, "step": 1893 }, { "epoch": 0.03462079806971686, "grad_norm": 8.20371403655816, "learning_rate": 9.9994446737563e-06, "loss": 18.0538, "step": 1894 }, { "epoch": 0.03463907726616338, "grad_norm": 9.802161995721683, "learning_rate": 9.999440253348805e-06, "loss": 18.7308, "step": 1895 }, { "epoch": 0.0346573564626099, "grad_norm": 8.322224215761992, "learning_rate": 9.999435815418705e-06, "loss": 18.3015, "step": 1896 }, { "epoch": 0.03467563565905643, "grad_norm": 8.148581700632096, "learning_rate": 9.999431359966017e-06, "loss": 17.8139, "step": 1897 }, { "epoch": 0.03469391485550295, "grad_norm": 8.92816337345986, "learning_rate": 9.999426886990758e-06, "loss": 18.0394, "step": 1898 }, { "epoch": 0.034712194051949474, "grad_norm": 8.825948598919227, "learning_rate": 9.999422396492937e-06, "loss": 18.4764, "step": 1899 }, { "epoch": 0.034730473248396, "grad_norm": 8.627968631064658, "learning_rate": 9.999417888472578e-06, "loss": 17.8798, "step": 1900 }, { "epoch": 0.034748752444842525, "grad_norm": 7.6009155117931675, "learning_rate": 9.999413362929691e-06, "loss": 18.207, "step": 1901 }, { "epoch": 0.034767031641289046, "grad_norm": 7.870549136132222, "learning_rate": 9.999408819864296e-06, "loss": 17.9298, "step": 1902 }, { "epoch": 0.034785310837735575, "grad_norm": 6.961455876093974, "learning_rate": 9.999404259276404e-06, "loss": 17.6394, "step": 1903 }, { "epoch": 0.0348035900341821, "grad_norm": 7.456431814896433, "learning_rate": 9.999399681166036e-06, "loss": 18.1099, "step": 1904 }, { "epoch": 0.03482186923062862, "grad_norm": 7.428766418167107, "learning_rate": 9.999395085533205e-06, "loss": 17.8307, "step": 1905 }, { "epoch": 0.03484014842707515, "grad_norm": 8.163133227231434, "learning_rate": 9.99939047237793e-06, "loss": 18.1721, "step": 1906 }, { "epoch": 0.03485842762352167, "grad_norm": 6.325296098614813, "learning_rate": 9.999385841700224e-06, "loss": 17.3985, "step": 1907 }, { "epoch": 0.03487670681996819, "grad_norm": 8.425185552349872, "learning_rate": 9.999381193500104e-06, "loss": 18.0274, "step": 1908 }, { "epoch": 0.03489498601641472, "grad_norm": 8.931619125908387, "learning_rate": 9.999376527777587e-06, "loss": 18.5191, "step": 1909 }, { "epoch": 0.03491326521286124, "grad_norm": 7.408835504737836, "learning_rate": 9.999371844532689e-06, "loss": 17.9273, "step": 1910 }, { "epoch": 0.03493154440930777, "grad_norm": 7.623356726961149, "learning_rate": 9.999367143765428e-06, "loss": 18.272, "step": 1911 }, { "epoch": 0.03494982360575429, "grad_norm": 7.95269138874238, "learning_rate": 9.999362425475817e-06, "loss": 17.9312, "step": 1912 }, { "epoch": 0.034968102802200814, "grad_norm": 7.462108851980732, "learning_rate": 9.999357689663875e-06, "loss": 17.7149, "step": 1913 }, { "epoch": 0.03498638199864734, "grad_norm": 7.283136697952984, "learning_rate": 9.999352936329619e-06, "loss": 17.6428, "step": 1914 }, { "epoch": 0.035004661195093864, "grad_norm": 8.192691586643772, "learning_rate": 9.999348165473064e-06, "loss": 18.2675, "step": 1915 }, { "epoch": 0.035022940391540386, "grad_norm": 8.498990045712594, "learning_rate": 9.999343377094227e-06, "loss": 18.2104, "step": 1916 }, { "epoch": 0.035041219587986915, "grad_norm": 8.765368366785685, "learning_rate": 9.999338571193126e-06, "loss": 17.692, "step": 1917 }, { "epoch": 0.035059498784433436, "grad_norm": 8.098246273684264, "learning_rate": 9.999333747769777e-06, "loss": 18.1488, "step": 1918 }, { "epoch": 0.03507777798087996, "grad_norm": 6.413145284090817, "learning_rate": 9.999328906824198e-06, "loss": 17.4056, "step": 1919 }, { "epoch": 0.03509605717732649, "grad_norm": 7.89135265827549, "learning_rate": 9.999324048356403e-06, "loss": 17.845, "step": 1920 }, { "epoch": 0.03511433637377301, "grad_norm": 7.709013863430543, "learning_rate": 9.999319172366412e-06, "loss": 17.934, "step": 1921 }, { "epoch": 0.03513261557021953, "grad_norm": 8.564207535733933, "learning_rate": 9.999314278854242e-06, "loss": 18.1819, "step": 1922 }, { "epoch": 0.03515089476666606, "grad_norm": 7.149263373420875, "learning_rate": 9.999309367819907e-06, "loss": 17.7353, "step": 1923 }, { "epoch": 0.03516917396311258, "grad_norm": 8.18832302643092, "learning_rate": 9.999304439263428e-06, "loss": 18.0075, "step": 1924 }, { "epoch": 0.0351874531595591, "grad_norm": 8.182007369919635, "learning_rate": 9.999299493184822e-06, "loss": 18.1542, "step": 1925 }, { "epoch": 0.03520573235600563, "grad_norm": 9.944147453652565, "learning_rate": 9.999294529584102e-06, "loss": 18.7217, "step": 1926 }, { "epoch": 0.03522401155245215, "grad_norm": 7.885567662621238, "learning_rate": 9.999289548461292e-06, "loss": 18.1261, "step": 1927 }, { "epoch": 0.03524229074889868, "grad_norm": 7.620667758005586, "learning_rate": 9.999284549816403e-06, "loss": 17.7779, "step": 1928 }, { "epoch": 0.0352605699453452, "grad_norm": 9.852841139533853, "learning_rate": 9.999279533649458e-06, "loss": 18.6092, "step": 1929 }, { "epoch": 0.035278849141791725, "grad_norm": 7.928579728197884, "learning_rate": 9.99927449996047e-06, "loss": 17.9599, "step": 1930 }, { "epoch": 0.035297128338238254, "grad_norm": 8.184321980595112, "learning_rate": 9.999269448749461e-06, "loss": 18.1889, "step": 1931 }, { "epoch": 0.035315407534684776, "grad_norm": 8.402935327668816, "learning_rate": 9.999264380016444e-06, "loss": 18.3697, "step": 1932 }, { "epoch": 0.0353336867311313, "grad_norm": 9.086470670696299, "learning_rate": 9.99925929376144e-06, "loss": 18.5396, "step": 1933 }, { "epoch": 0.035351965927577826, "grad_norm": 8.490461255836463, "learning_rate": 9.999254189984466e-06, "loss": 18.2341, "step": 1934 }, { "epoch": 0.03537024512402435, "grad_norm": 7.094884201354597, "learning_rate": 9.999249068685539e-06, "loss": 17.5918, "step": 1935 }, { "epoch": 0.03538852432047087, "grad_norm": 8.499136896972471, "learning_rate": 9.999243929864679e-06, "loss": 18.1365, "step": 1936 }, { "epoch": 0.0354068035169174, "grad_norm": 7.535927532248468, "learning_rate": 9.999238773521902e-06, "loss": 17.9605, "step": 1937 }, { "epoch": 0.03542508271336392, "grad_norm": 8.389999966836863, "learning_rate": 9.999233599657228e-06, "loss": 17.7126, "step": 1938 }, { "epoch": 0.03544336190981044, "grad_norm": 8.287982338740152, "learning_rate": 9.999228408270674e-06, "loss": 18.2419, "step": 1939 }, { "epoch": 0.03546164110625697, "grad_norm": 7.9503520158933165, "learning_rate": 9.999223199362257e-06, "loss": 18.0246, "step": 1940 }, { "epoch": 0.03547992030270349, "grad_norm": 9.21418739958329, "learning_rate": 9.999217972931998e-06, "loss": 18.7629, "step": 1941 }, { "epoch": 0.035498199499150014, "grad_norm": 7.782900053670787, "learning_rate": 9.999212728979912e-06, "loss": 17.839, "step": 1942 }, { "epoch": 0.03551647869559654, "grad_norm": 8.143164639304983, "learning_rate": 9.999207467506022e-06, "loss": 18.0542, "step": 1943 }, { "epoch": 0.035534757892043065, "grad_norm": 7.389277482561079, "learning_rate": 9.999202188510341e-06, "loss": 17.6361, "step": 1944 }, { "epoch": 0.03555303708848959, "grad_norm": 10.202846117878034, "learning_rate": 9.999196891992892e-06, "loss": 18.7116, "step": 1945 }, { "epoch": 0.035571316284936115, "grad_norm": 7.494605340684641, "learning_rate": 9.999191577953692e-06, "loss": 18.011, "step": 1946 }, { "epoch": 0.03558959548138264, "grad_norm": 8.38155763029932, "learning_rate": 9.999186246392756e-06, "loss": 18.3662, "step": 1947 }, { "epoch": 0.035607874677829165, "grad_norm": 7.850468999216842, "learning_rate": 9.999180897310108e-06, "loss": 17.9277, "step": 1948 }, { "epoch": 0.03562615387427569, "grad_norm": 7.2770297355717615, "learning_rate": 9.999175530705765e-06, "loss": 17.8763, "step": 1949 }, { "epoch": 0.03564443307072221, "grad_norm": 7.497010366244362, "learning_rate": 9.999170146579746e-06, "loss": 17.9092, "step": 1950 }, { "epoch": 0.03566271226716874, "grad_norm": 7.147592489045447, "learning_rate": 9.999164744932069e-06, "loss": 17.5548, "step": 1951 }, { "epoch": 0.03568099146361526, "grad_norm": 7.1198846531257995, "learning_rate": 9.999159325762753e-06, "loss": 17.7998, "step": 1952 }, { "epoch": 0.03569927066006178, "grad_norm": 8.128197845288225, "learning_rate": 9.999153889071818e-06, "loss": 17.9989, "step": 1953 }, { "epoch": 0.03571754985650831, "grad_norm": 7.423488993866567, "learning_rate": 9.999148434859282e-06, "loss": 17.7657, "step": 1954 }, { "epoch": 0.03573582905295483, "grad_norm": 9.400429043325508, "learning_rate": 9.999142963125164e-06, "loss": 18.5479, "step": 1955 }, { "epoch": 0.03575410824940135, "grad_norm": 10.386644570380422, "learning_rate": 9.999137473869484e-06, "loss": 18.5998, "step": 1956 }, { "epoch": 0.03577238744584788, "grad_norm": 7.552400237830502, "learning_rate": 9.999131967092262e-06, "loss": 17.7759, "step": 1957 }, { "epoch": 0.035790666642294404, "grad_norm": 7.230520694917503, "learning_rate": 9.999126442793515e-06, "loss": 17.8236, "step": 1958 }, { "epoch": 0.035808945838740926, "grad_norm": 9.240852364016215, "learning_rate": 9.999120900973264e-06, "loss": 18.3381, "step": 1959 }, { "epoch": 0.035827225035187454, "grad_norm": 8.479502318693042, "learning_rate": 9.999115341631528e-06, "loss": 18.5659, "step": 1960 }, { "epoch": 0.035845504231633976, "grad_norm": 8.255809598397338, "learning_rate": 9.999109764768328e-06, "loss": 17.9439, "step": 1961 }, { "epoch": 0.035863783428080505, "grad_norm": 7.10311904635282, "learning_rate": 9.99910417038368e-06, "loss": 17.5031, "step": 1962 }, { "epoch": 0.03588206262452703, "grad_norm": 8.354939519427754, "learning_rate": 9.999098558477606e-06, "loss": 18.2835, "step": 1963 }, { "epoch": 0.03590034182097355, "grad_norm": 8.169834269388787, "learning_rate": 9.999092929050126e-06, "loss": 18.0539, "step": 1964 }, { "epoch": 0.03591862101742008, "grad_norm": 7.209724239423868, "learning_rate": 9.99908728210126e-06, "loss": 17.5991, "step": 1965 }, { "epoch": 0.0359369002138666, "grad_norm": 7.6799187576786245, "learning_rate": 9.999081617631026e-06, "loss": 18.0274, "step": 1966 }, { "epoch": 0.03595517941031312, "grad_norm": 8.902126792562187, "learning_rate": 9.999075935639445e-06, "loss": 18.6911, "step": 1967 }, { "epoch": 0.03597345860675965, "grad_norm": 7.673549516548946, "learning_rate": 9.999070236126536e-06, "loss": 18.1461, "step": 1968 }, { "epoch": 0.03599173780320617, "grad_norm": 7.166515808137726, "learning_rate": 9.99906451909232e-06, "loss": 17.4114, "step": 1969 }, { "epoch": 0.03601001699965269, "grad_norm": 7.428286380871508, "learning_rate": 9.999058784536816e-06, "loss": 17.7822, "step": 1970 }, { "epoch": 0.03602829619609922, "grad_norm": 8.27330724470139, "learning_rate": 9.999053032460044e-06, "loss": 17.432, "step": 1971 }, { "epoch": 0.03604657539254574, "grad_norm": 7.744580295743159, "learning_rate": 9.999047262862027e-06, "loss": 17.8798, "step": 1972 }, { "epoch": 0.036064854588992265, "grad_norm": 7.065151922031846, "learning_rate": 9.999041475742783e-06, "loss": 18.016, "step": 1973 }, { "epoch": 0.036083133785438794, "grad_norm": 7.15757239909588, "learning_rate": 9.99903567110233e-06, "loss": 17.8817, "step": 1974 }, { "epoch": 0.036101412981885316, "grad_norm": 8.853898794161905, "learning_rate": 9.999029848940694e-06, "loss": 17.9513, "step": 1975 }, { "epoch": 0.03611969217833184, "grad_norm": 6.892125342950771, "learning_rate": 9.99902400925789e-06, "loss": 17.4626, "step": 1976 }, { "epoch": 0.036137971374778366, "grad_norm": 8.855795305081163, "learning_rate": 9.999018152053942e-06, "loss": 18.453, "step": 1977 }, { "epoch": 0.03615625057122489, "grad_norm": 9.553477180571765, "learning_rate": 9.999012277328868e-06, "loss": 19.0001, "step": 1978 }, { "epoch": 0.036174529767671416, "grad_norm": 7.146861108136913, "learning_rate": 9.99900638508269e-06, "loss": 17.5435, "step": 1979 }, { "epoch": 0.03619280896411794, "grad_norm": 7.083904017713698, "learning_rate": 9.999000475315429e-06, "loss": 17.6269, "step": 1980 }, { "epoch": 0.03621108816056446, "grad_norm": 8.181874569690923, "learning_rate": 9.998994548027106e-06, "loss": 18.339, "step": 1981 }, { "epoch": 0.03622936735701099, "grad_norm": 8.15235850629926, "learning_rate": 9.998988603217738e-06, "loss": 18.3941, "step": 1982 }, { "epoch": 0.03624764655345751, "grad_norm": 8.495091145565938, "learning_rate": 9.998982640887352e-06, "loss": 17.9946, "step": 1983 }, { "epoch": 0.03626592574990403, "grad_norm": 6.4835607043705785, "learning_rate": 9.998976661035964e-06, "loss": 17.6544, "step": 1984 }, { "epoch": 0.03628420494635056, "grad_norm": 8.841987597518289, "learning_rate": 9.998970663663596e-06, "loss": 18.0508, "step": 1985 }, { "epoch": 0.03630248414279708, "grad_norm": 10.217090662048435, "learning_rate": 9.998964648770271e-06, "loss": 18.1618, "step": 1986 }, { "epoch": 0.036320763339243604, "grad_norm": 8.019689206397057, "learning_rate": 9.998958616356006e-06, "loss": 17.9514, "step": 1987 }, { "epoch": 0.03633904253569013, "grad_norm": 6.991445236201103, "learning_rate": 9.998952566420828e-06, "loss": 17.8643, "step": 1988 }, { "epoch": 0.036357321732136655, "grad_norm": 7.789091397301192, "learning_rate": 9.998946498964755e-06, "loss": 18.2038, "step": 1989 }, { "epoch": 0.03637560092858318, "grad_norm": 7.141653228475028, "learning_rate": 9.998940413987805e-06, "loss": 17.8061, "step": 1990 }, { "epoch": 0.036393880125029705, "grad_norm": 8.066331113358645, "learning_rate": 9.998934311490005e-06, "loss": 18.1814, "step": 1991 }, { "epoch": 0.03641215932147623, "grad_norm": 7.335124048620984, "learning_rate": 9.998928191471376e-06, "loss": 17.875, "step": 1992 }, { "epoch": 0.03643043851792275, "grad_norm": 9.112505337222876, "learning_rate": 9.998922053931935e-06, "loss": 18.6293, "step": 1993 }, { "epoch": 0.03644871771436928, "grad_norm": 7.54777861867234, "learning_rate": 9.998915898871705e-06, "loss": 17.8176, "step": 1994 }, { "epoch": 0.0364669969108158, "grad_norm": 8.540294308523698, "learning_rate": 9.998909726290711e-06, "loss": 18.4626, "step": 1995 }, { "epoch": 0.03648527610726233, "grad_norm": 8.571296800654416, "learning_rate": 9.99890353618897e-06, "loss": 18.1324, "step": 1996 }, { "epoch": 0.03650355530370885, "grad_norm": 8.426795245663996, "learning_rate": 9.998897328566506e-06, "loss": 17.8787, "step": 1997 }, { "epoch": 0.03652183450015537, "grad_norm": 7.607679739554118, "learning_rate": 9.998891103423343e-06, "loss": 17.7262, "step": 1998 }, { "epoch": 0.0365401136966019, "grad_norm": 7.353285686355623, "learning_rate": 9.998884860759499e-06, "loss": 17.7066, "step": 1999 }, { "epoch": 0.03655839289304842, "grad_norm": 8.198968849277952, "learning_rate": 9.998878600574998e-06, "loss": 18.0655, "step": 2000 }, { "epoch": 0.036576672089494944, "grad_norm": 7.90908222211619, "learning_rate": 9.998872322869859e-06, "loss": 18.1217, "step": 2001 }, { "epoch": 0.03659495128594147, "grad_norm": 7.434964235402946, "learning_rate": 9.99886602764411e-06, "loss": 17.6969, "step": 2002 }, { "epoch": 0.036613230482387994, "grad_norm": 8.434196775394652, "learning_rate": 9.998859714897765e-06, "loss": 18.1832, "step": 2003 }, { "epoch": 0.036631509678834516, "grad_norm": 7.846211573266152, "learning_rate": 9.998853384630853e-06, "loss": 17.8832, "step": 2004 }, { "epoch": 0.036649788875281045, "grad_norm": 8.75645058921149, "learning_rate": 9.998847036843394e-06, "loss": 18.4057, "step": 2005 }, { "epoch": 0.036668068071727566, "grad_norm": 7.4793447817184955, "learning_rate": 9.998840671535411e-06, "loss": 17.7704, "step": 2006 }, { "epoch": 0.03668634726817409, "grad_norm": 8.060297496152803, "learning_rate": 9.998834288706922e-06, "loss": 17.9751, "step": 2007 }, { "epoch": 0.03670462646462062, "grad_norm": 6.269820340808365, "learning_rate": 9.998827888357956e-06, "loss": 17.3203, "step": 2008 }, { "epoch": 0.03672290566106714, "grad_norm": 7.616782429628068, "learning_rate": 9.998821470488529e-06, "loss": 17.8384, "step": 2009 }, { "epoch": 0.03674118485751366, "grad_norm": 8.33085901188751, "learning_rate": 9.998815035098668e-06, "loss": 18.0717, "step": 2010 }, { "epoch": 0.03675946405396019, "grad_norm": 7.723734714418637, "learning_rate": 9.998808582188393e-06, "loss": 17.8507, "step": 2011 }, { "epoch": 0.03677774325040671, "grad_norm": 9.018866210300486, "learning_rate": 9.998802111757729e-06, "loss": 18.6216, "step": 2012 }, { "epoch": 0.03679602244685324, "grad_norm": 6.885161674366217, "learning_rate": 9.998795623806697e-06, "loss": 17.5822, "step": 2013 }, { "epoch": 0.03681430164329976, "grad_norm": 8.091115820985259, "learning_rate": 9.99878911833532e-06, "loss": 17.9485, "step": 2014 }, { "epoch": 0.03683258083974628, "grad_norm": 6.987997512528637, "learning_rate": 9.998782595343621e-06, "loss": 17.4912, "step": 2015 }, { "epoch": 0.03685086003619281, "grad_norm": 8.222991809959506, "learning_rate": 9.998776054831623e-06, "loss": 18.1029, "step": 2016 }, { "epoch": 0.036869139232639334, "grad_norm": 9.05027801037263, "learning_rate": 9.998769496799347e-06, "loss": 18.2343, "step": 2017 }, { "epoch": 0.036887418429085855, "grad_norm": 9.128431730728373, "learning_rate": 9.99876292124682e-06, "loss": 18.5984, "step": 2018 }, { "epoch": 0.036905697625532384, "grad_norm": 7.954206344639059, "learning_rate": 9.998756328174062e-06, "loss": 18.0505, "step": 2019 }, { "epoch": 0.036923976821978906, "grad_norm": 8.188015996937526, "learning_rate": 9.998749717581097e-06, "loss": 17.894, "step": 2020 }, { "epoch": 0.03694225601842543, "grad_norm": 7.75412895491765, "learning_rate": 9.998743089467949e-06, "loss": 18.148, "step": 2021 }, { "epoch": 0.036960535214871956, "grad_norm": 8.109647665884854, "learning_rate": 9.998736443834637e-06, "loss": 18.3236, "step": 2022 }, { "epoch": 0.03697881441131848, "grad_norm": 7.780509706408551, "learning_rate": 9.99872978068119e-06, "loss": 18.0863, "step": 2023 }, { "epoch": 0.036997093607765, "grad_norm": 9.09458460854067, "learning_rate": 9.998723100007628e-06, "loss": 18.5133, "step": 2024 }, { "epoch": 0.03701537280421153, "grad_norm": 9.863279863855022, "learning_rate": 9.998716401813975e-06, "loss": 19.0626, "step": 2025 }, { "epoch": 0.03703365200065805, "grad_norm": 7.253133859486355, "learning_rate": 9.998709686100256e-06, "loss": 17.9412, "step": 2026 }, { "epoch": 0.03705193119710457, "grad_norm": 7.932175176406323, "learning_rate": 9.998702952866494e-06, "loss": 17.6815, "step": 2027 }, { "epoch": 0.0370702103935511, "grad_norm": 7.339503659937987, "learning_rate": 9.99869620211271e-06, "loss": 17.6606, "step": 2028 }, { "epoch": 0.03708848958999762, "grad_norm": 7.907738127442454, "learning_rate": 9.99868943383893e-06, "loss": 18.2433, "step": 2029 }, { "epoch": 0.03710676878644415, "grad_norm": 7.775746964937671, "learning_rate": 9.998682648045178e-06, "loss": 17.9101, "step": 2030 }, { "epoch": 0.03712504798289067, "grad_norm": 9.04856581963131, "learning_rate": 9.998675844731475e-06, "loss": 18.1439, "step": 2031 }, { "epoch": 0.037143327179337195, "grad_norm": 7.3359385924263485, "learning_rate": 9.99866902389785e-06, "loss": 18.0449, "step": 2032 }, { "epoch": 0.03716160637578372, "grad_norm": 7.519600586027241, "learning_rate": 9.998662185544323e-06, "loss": 17.8224, "step": 2033 }, { "epoch": 0.037179885572230245, "grad_norm": 7.987638704160971, "learning_rate": 9.998655329670918e-06, "loss": 18.2923, "step": 2034 }, { "epoch": 0.03719816476867677, "grad_norm": 7.681158036608714, "learning_rate": 9.998648456277659e-06, "loss": 18.1971, "step": 2035 }, { "epoch": 0.037216443965123296, "grad_norm": 8.12090519344087, "learning_rate": 9.998641565364573e-06, "loss": 18.4449, "step": 2036 }, { "epoch": 0.03723472316156982, "grad_norm": 7.51928492652649, "learning_rate": 9.99863465693168e-06, "loss": 17.6491, "step": 2037 }, { "epoch": 0.03725300235801634, "grad_norm": 7.913103109118726, "learning_rate": 9.998627730979008e-06, "loss": 17.7631, "step": 2038 }, { "epoch": 0.03727128155446287, "grad_norm": 7.669560039298758, "learning_rate": 9.99862078750658e-06, "loss": 17.8085, "step": 2039 }, { "epoch": 0.03728956075090939, "grad_norm": 8.251834017752726, "learning_rate": 9.998613826514418e-06, "loss": 18.243, "step": 2040 }, { "epoch": 0.03730783994735591, "grad_norm": 8.873049721162209, "learning_rate": 9.998606848002548e-06, "loss": 18.4131, "step": 2041 }, { "epoch": 0.03732611914380244, "grad_norm": 7.451580746322862, "learning_rate": 9.998599851970997e-06, "loss": 17.9024, "step": 2042 }, { "epoch": 0.03734439834024896, "grad_norm": 7.697008875890094, "learning_rate": 9.998592838419787e-06, "loss": 18.1986, "step": 2043 }, { "epoch": 0.037362677536695484, "grad_norm": 8.120055913869294, "learning_rate": 9.998585807348942e-06, "loss": 17.8143, "step": 2044 }, { "epoch": 0.03738095673314201, "grad_norm": 8.278267453559662, "learning_rate": 9.998578758758486e-06, "loss": 18.3097, "step": 2045 }, { "epoch": 0.037399235929588534, "grad_norm": 8.153431653087639, "learning_rate": 9.998571692648447e-06, "loss": 18.1901, "step": 2046 }, { "epoch": 0.03741751512603506, "grad_norm": 8.006834892764367, "learning_rate": 9.998564609018848e-06, "loss": 18.4237, "step": 2047 }, { "epoch": 0.037435794322481585, "grad_norm": 8.303060078555337, "learning_rate": 9.998557507869714e-06, "loss": 18.3405, "step": 2048 }, { "epoch": 0.037454073518928106, "grad_norm": 7.2265990118581955, "learning_rate": 9.99855038920107e-06, "loss": 17.6187, "step": 2049 }, { "epoch": 0.037472352715374635, "grad_norm": 8.041556758724703, "learning_rate": 9.998543253012938e-06, "loss": 17.9432, "step": 2050 }, { "epoch": 0.03749063191182116, "grad_norm": 6.991937103045851, "learning_rate": 9.998536099305348e-06, "loss": 17.675, "step": 2051 }, { "epoch": 0.03750891110826768, "grad_norm": 8.035188795438987, "learning_rate": 9.998528928078321e-06, "loss": 18.1265, "step": 2052 }, { "epoch": 0.03752719030471421, "grad_norm": 7.620085867284849, "learning_rate": 9.998521739331886e-06, "loss": 17.9205, "step": 2053 }, { "epoch": 0.03754546950116073, "grad_norm": 9.072221427512684, "learning_rate": 9.998514533066066e-06, "loss": 18.5335, "step": 2054 }, { "epoch": 0.03756374869760725, "grad_norm": 8.568238815700667, "learning_rate": 9.998507309280886e-06, "loss": 18.0401, "step": 2055 }, { "epoch": 0.03758202789405378, "grad_norm": 10.039383540553157, "learning_rate": 9.99850006797637e-06, "loss": 18.831, "step": 2056 }, { "epoch": 0.0376003070905003, "grad_norm": 6.1131761158643245, "learning_rate": 9.998492809152545e-06, "loss": 17.1808, "step": 2057 }, { "epoch": 0.03761858628694682, "grad_norm": 6.7440363014529074, "learning_rate": 9.99848553280944e-06, "loss": 17.7552, "step": 2058 }, { "epoch": 0.03763686548339335, "grad_norm": 6.955143337867716, "learning_rate": 9.998478238947074e-06, "loss": 17.4316, "step": 2059 }, { "epoch": 0.03765514467983987, "grad_norm": 7.28685085214896, "learning_rate": 9.998470927565476e-06, "loss": 17.5236, "step": 2060 }, { "epoch": 0.037673423876286395, "grad_norm": 9.235502043145019, "learning_rate": 9.998463598664669e-06, "loss": 18.3993, "step": 2061 }, { "epoch": 0.037691703072732924, "grad_norm": 8.432838318719247, "learning_rate": 9.998456252244684e-06, "loss": 18.4229, "step": 2062 }, { "epoch": 0.037709982269179446, "grad_norm": 7.339767176483528, "learning_rate": 9.998448888305543e-06, "loss": 17.6698, "step": 2063 }, { "epoch": 0.037728261465625974, "grad_norm": 6.833365341315317, "learning_rate": 9.998441506847271e-06, "loss": 17.5013, "step": 2064 }, { "epoch": 0.037746540662072496, "grad_norm": 6.318341016075187, "learning_rate": 9.998434107869897e-06, "loss": 17.2457, "step": 2065 }, { "epoch": 0.03776481985851902, "grad_norm": 7.97844973845759, "learning_rate": 9.998426691373443e-06, "loss": 18.2731, "step": 2066 }, { "epoch": 0.03778309905496555, "grad_norm": 9.022125937561709, "learning_rate": 9.99841925735794e-06, "loss": 18.7425, "step": 2067 }, { "epoch": 0.03780137825141207, "grad_norm": 8.080430879740227, "learning_rate": 9.99841180582341e-06, "loss": 18.5238, "step": 2068 }, { "epoch": 0.03781965744785859, "grad_norm": 7.334230547063637, "learning_rate": 9.99840433676988e-06, "loss": 17.6445, "step": 2069 }, { "epoch": 0.03783793664430512, "grad_norm": 7.8255129735717155, "learning_rate": 9.998396850197376e-06, "loss": 18.0013, "step": 2070 }, { "epoch": 0.03785621584075164, "grad_norm": 7.146299981961723, "learning_rate": 9.998389346105925e-06, "loss": 17.7126, "step": 2071 }, { "epoch": 0.03787449503719816, "grad_norm": 7.973704356003748, "learning_rate": 9.998381824495556e-06, "loss": 17.6137, "step": 2072 }, { "epoch": 0.03789277423364469, "grad_norm": 8.325580556641624, "learning_rate": 9.998374285366289e-06, "loss": 18.1783, "step": 2073 }, { "epoch": 0.03791105343009121, "grad_norm": 8.501162206117245, "learning_rate": 9.998366728718155e-06, "loss": 18.1392, "step": 2074 }, { "epoch": 0.037929332626537735, "grad_norm": 6.5954947334230365, "learning_rate": 9.998359154551178e-06, "loss": 17.4255, "step": 2075 }, { "epoch": 0.03794761182298426, "grad_norm": 9.095095045655555, "learning_rate": 9.998351562865387e-06, "loss": 18.9657, "step": 2076 }, { "epoch": 0.037965891019430785, "grad_norm": 8.181969869684231, "learning_rate": 9.99834395366081e-06, "loss": 18.2521, "step": 2077 }, { "epoch": 0.03798417021587731, "grad_norm": 7.557685423507036, "learning_rate": 9.998336326937468e-06, "loss": 17.8126, "step": 2078 }, { "epoch": 0.038002449412323835, "grad_norm": 8.69340557232809, "learning_rate": 9.998328682695391e-06, "loss": 18.5631, "step": 2079 }, { "epoch": 0.03802072860877036, "grad_norm": 7.546948153844951, "learning_rate": 9.998321020934607e-06, "loss": 18.0963, "step": 2080 }, { "epoch": 0.038039007805216886, "grad_norm": 8.297753147884801, "learning_rate": 9.998313341655142e-06, "loss": 18.312, "step": 2081 }, { "epoch": 0.03805728700166341, "grad_norm": 6.922271759890242, "learning_rate": 9.99830564485702e-06, "loss": 17.6805, "step": 2082 }, { "epoch": 0.03807556619810993, "grad_norm": 7.008500913491092, "learning_rate": 9.998297930540273e-06, "loss": 17.6757, "step": 2083 }, { "epoch": 0.03809384539455646, "grad_norm": 9.141224497140042, "learning_rate": 9.998290198704924e-06, "loss": 18.5254, "step": 2084 }, { "epoch": 0.03811212459100298, "grad_norm": 6.172946546054869, "learning_rate": 9.998282449351002e-06, "loss": 17.2599, "step": 2085 }, { "epoch": 0.0381304037874495, "grad_norm": 8.891334316381819, "learning_rate": 9.998274682478535e-06, "loss": 18.6278, "step": 2086 }, { "epoch": 0.03814868298389603, "grad_norm": 8.313104732808613, "learning_rate": 9.998266898087546e-06, "loss": 18.0706, "step": 2087 }, { "epoch": 0.03816696218034255, "grad_norm": 7.617135262532365, "learning_rate": 9.998259096178067e-06, "loss": 18.0323, "step": 2088 }, { "epoch": 0.038185241376789074, "grad_norm": 7.312424259446676, "learning_rate": 9.998251276750124e-06, "loss": 17.57, "step": 2089 }, { "epoch": 0.0382035205732356, "grad_norm": 7.557625541284032, "learning_rate": 9.998243439803743e-06, "loss": 17.8035, "step": 2090 }, { "epoch": 0.038221799769682124, "grad_norm": 8.286744336007226, "learning_rate": 9.998235585338953e-06, "loss": 18.3543, "step": 2091 }, { "epoch": 0.038240078966128646, "grad_norm": 8.51951773168702, "learning_rate": 9.998227713355782e-06, "loss": 18.3313, "step": 2092 }, { "epoch": 0.038258358162575175, "grad_norm": 7.784579198221027, "learning_rate": 9.998219823854255e-06, "loss": 18.3275, "step": 2093 }, { "epoch": 0.0382766373590217, "grad_norm": 6.878269528459254, "learning_rate": 9.998211916834402e-06, "loss": 17.506, "step": 2094 }, { "epoch": 0.03829491655546822, "grad_norm": 6.077809690735215, "learning_rate": 9.99820399229625e-06, "loss": 17.1299, "step": 2095 }, { "epoch": 0.03831319575191475, "grad_norm": 7.245859913601165, "learning_rate": 9.998196050239827e-06, "loss": 17.6635, "step": 2096 }, { "epoch": 0.03833147494836127, "grad_norm": 7.879792392452729, "learning_rate": 9.998188090665159e-06, "loss": 17.8758, "step": 2097 }, { "epoch": 0.0383497541448078, "grad_norm": 6.97668441078997, "learning_rate": 9.998180113572277e-06, "loss": 17.6424, "step": 2098 }, { "epoch": 0.03836803334125432, "grad_norm": 7.754336743164202, "learning_rate": 9.998172118961207e-06, "loss": 18.2514, "step": 2099 }, { "epoch": 0.03838631253770084, "grad_norm": 8.158229133475526, "learning_rate": 9.998164106831978e-06, "loss": 18.0946, "step": 2100 }, { "epoch": 0.03840459173414737, "grad_norm": 7.938238548236804, "learning_rate": 9.998156077184617e-06, "loss": 18.078, "step": 2101 }, { "epoch": 0.03842287093059389, "grad_norm": 8.48571756895092, "learning_rate": 9.998148030019152e-06, "loss": 18.186, "step": 2102 }, { "epoch": 0.03844115012704041, "grad_norm": 6.878298713002423, "learning_rate": 9.998139965335613e-06, "loss": 17.4737, "step": 2103 }, { "epoch": 0.03845942932348694, "grad_norm": 8.569729559872155, "learning_rate": 9.998131883134028e-06, "loss": 18.0949, "step": 2104 }, { "epoch": 0.038477708519933464, "grad_norm": 7.296476454258555, "learning_rate": 9.998123783414421e-06, "loss": 18.0296, "step": 2105 }, { "epoch": 0.038495987716379985, "grad_norm": 8.22560600922055, "learning_rate": 9.998115666176828e-06, "loss": 17.8063, "step": 2106 }, { "epoch": 0.038514266912826514, "grad_norm": 8.49243278887264, "learning_rate": 9.99810753142127e-06, "loss": 18.2786, "step": 2107 }, { "epoch": 0.038532546109273036, "grad_norm": 7.4911301907547685, "learning_rate": 9.99809937914778e-06, "loss": 17.9241, "step": 2108 }, { "epoch": 0.03855082530571956, "grad_norm": 8.29966767865699, "learning_rate": 9.998091209356387e-06, "loss": 18.4018, "step": 2109 }, { "epoch": 0.038569104502166086, "grad_norm": 7.275110598935181, "learning_rate": 9.998083022047116e-06, "loss": 17.7244, "step": 2110 }, { "epoch": 0.03858738369861261, "grad_norm": 6.894500030672901, "learning_rate": 9.998074817219999e-06, "loss": 17.4782, "step": 2111 }, { "epoch": 0.03860566289505913, "grad_norm": 6.973114564655955, "learning_rate": 9.998066594875063e-06, "loss": 17.6483, "step": 2112 }, { "epoch": 0.03862394209150566, "grad_norm": 7.8712395141148495, "learning_rate": 9.998058355012337e-06, "loss": 18.2726, "step": 2113 }, { "epoch": 0.03864222128795218, "grad_norm": 7.846501033939585, "learning_rate": 9.99805009763185e-06, "loss": 17.8641, "step": 2114 }, { "epoch": 0.03866050048439871, "grad_norm": 7.796771949115895, "learning_rate": 9.99804182273363e-06, "loss": 18.0188, "step": 2115 }, { "epoch": 0.03867877968084523, "grad_norm": 6.346450837225925, "learning_rate": 9.99803353031771e-06, "loss": 17.2014, "step": 2116 }, { "epoch": 0.03869705887729175, "grad_norm": 9.008885742021926, "learning_rate": 9.998025220384114e-06, "loss": 18.5797, "step": 2117 }, { "epoch": 0.03871533807373828, "grad_norm": 6.600728835551202, "learning_rate": 9.998016892932873e-06, "loss": 17.3858, "step": 2118 }, { "epoch": 0.0387336172701848, "grad_norm": 7.9493070426391075, "learning_rate": 9.998008547964018e-06, "loss": 17.9694, "step": 2119 }, { "epoch": 0.038751896466631325, "grad_norm": 8.29793855732644, "learning_rate": 9.998000185477576e-06, "loss": 18.1664, "step": 2120 }, { "epoch": 0.038770175663077854, "grad_norm": 8.018611705768237, "learning_rate": 9.997991805473577e-06, "loss": 18.1572, "step": 2121 }, { "epoch": 0.038788454859524375, "grad_norm": 7.7486960969514245, "learning_rate": 9.997983407952052e-06, "loss": 17.7031, "step": 2122 }, { "epoch": 0.0388067340559709, "grad_norm": 7.005074737558086, "learning_rate": 9.997974992913026e-06, "loss": 17.7374, "step": 2123 }, { "epoch": 0.038825013252417426, "grad_norm": 8.463804697132712, "learning_rate": 9.997966560356534e-06, "loss": 18.3967, "step": 2124 }, { "epoch": 0.03884329244886395, "grad_norm": 8.827072480057215, "learning_rate": 9.997958110282602e-06, "loss": 18.3065, "step": 2125 }, { "epoch": 0.03886157164531047, "grad_norm": 8.028287371587094, "learning_rate": 9.99794964269126e-06, "loss": 18.1977, "step": 2126 }, { "epoch": 0.038879850841757, "grad_norm": 7.609837991470296, "learning_rate": 9.997941157582538e-06, "loss": 17.8667, "step": 2127 }, { "epoch": 0.03889813003820352, "grad_norm": 7.609650111579857, "learning_rate": 9.997932654956467e-06, "loss": 17.8524, "step": 2128 }, { "epoch": 0.03891640923465004, "grad_norm": 6.3001475843222075, "learning_rate": 9.997924134813075e-06, "loss": 17.3262, "step": 2129 }, { "epoch": 0.03893468843109657, "grad_norm": 8.218052885471584, "learning_rate": 9.997915597152394e-06, "loss": 18.016, "step": 2130 }, { "epoch": 0.03895296762754309, "grad_norm": 8.96614079578861, "learning_rate": 9.99790704197445e-06, "loss": 18.2741, "step": 2131 }, { "epoch": 0.03897124682398962, "grad_norm": 7.338381226610959, "learning_rate": 9.997898469279278e-06, "loss": 17.5664, "step": 2132 }, { "epoch": 0.03898952602043614, "grad_norm": 7.397884465912606, "learning_rate": 9.997889879066904e-06, "loss": 18.05, "step": 2133 }, { "epoch": 0.039007805216882664, "grad_norm": 6.351306050693116, "learning_rate": 9.99788127133736e-06, "loss": 17.3067, "step": 2134 }, { "epoch": 0.03902608441332919, "grad_norm": 8.342880151387575, "learning_rate": 9.997872646090675e-06, "loss": 18.4756, "step": 2135 }, { "epoch": 0.039044363609775715, "grad_norm": 8.520078110217645, "learning_rate": 9.997864003326882e-06, "loss": 18.5616, "step": 2136 }, { "epoch": 0.039062642806222236, "grad_norm": 7.15659043512593, "learning_rate": 9.997855343046007e-06, "loss": 17.5117, "step": 2137 }, { "epoch": 0.039080922002668765, "grad_norm": 6.854967211447595, "learning_rate": 9.997846665248086e-06, "loss": 17.6334, "step": 2138 }, { "epoch": 0.03909920119911529, "grad_norm": 7.0789309063840165, "learning_rate": 9.997837969933144e-06, "loss": 17.7366, "step": 2139 }, { "epoch": 0.03911748039556181, "grad_norm": 9.024590484928225, "learning_rate": 9.997829257101214e-06, "loss": 18.69, "step": 2140 }, { "epoch": 0.03913575959200834, "grad_norm": 8.416640311669644, "learning_rate": 9.997820526752327e-06, "loss": 17.8544, "step": 2141 }, { "epoch": 0.03915403878845486, "grad_norm": 8.718851178261065, "learning_rate": 9.99781177888651e-06, "loss": 18.3247, "step": 2142 }, { "epoch": 0.03917231798490138, "grad_norm": 8.160393105502534, "learning_rate": 9.9978030135038e-06, "loss": 17.906, "step": 2143 }, { "epoch": 0.03919059718134791, "grad_norm": 7.707170090801509, "learning_rate": 9.997794230604221e-06, "loss": 17.9793, "step": 2144 }, { "epoch": 0.03920887637779443, "grad_norm": 6.371815042310033, "learning_rate": 9.997785430187808e-06, "loss": 17.5896, "step": 2145 }, { "epoch": 0.03922715557424095, "grad_norm": 6.710940234620757, "learning_rate": 9.99777661225459e-06, "loss": 17.4544, "step": 2146 }, { "epoch": 0.03924543477068748, "grad_norm": 7.709280379495519, "learning_rate": 9.997767776804601e-06, "loss": 18.0954, "step": 2147 }, { "epoch": 0.039263713967134004, "grad_norm": 7.760644279078112, "learning_rate": 9.997758923837868e-06, "loss": 18.0527, "step": 2148 }, { "epoch": 0.03928199316358053, "grad_norm": 7.049552993140452, "learning_rate": 9.997750053354425e-06, "loss": 17.6109, "step": 2149 }, { "epoch": 0.039300272360027054, "grad_norm": 7.566624110683898, "learning_rate": 9.9977411653543e-06, "loss": 18.1585, "step": 2150 }, { "epoch": 0.039318551556473576, "grad_norm": 7.097300511901119, "learning_rate": 9.997732259837528e-06, "loss": 17.6664, "step": 2151 }, { "epoch": 0.039336830752920104, "grad_norm": 7.893771154913385, "learning_rate": 9.997723336804134e-06, "loss": 18.1228, "step": 2152 }, { "epoch": 0.039355109949366626, "grad_norm": 7.819552400648903, "learning_rate": 9.997714396254157e-06, "loss": 17.9937, "step": 2153 }, { "epoch": 0.03937338914581315, "grad_norm": 7.864297066489012, "learning_rate": 9.997705438187624e-06, "loss": 17.902, "step": 2154 }, { "epoch": 0.03939166834225968, "grad_norm": 7.733725836401352, "learning_rate": 9.997696462604567e-06, "loss": 17.6863, "step": 2155 }, { "epoch": 0.0394099475387062, "grad_norm": 8.522942184970894, "learning_rate": 9.997687469505018e-06, "loss": 18.3173, "step": 2156 }, { "epoch": 0.03942822673515272, "grad_norm": 8.175017067967467, "learning_rate": 9.997678458889006e-06, "loss": 17.6859, "step": 2157 }, { "epoch": 0.03944650593159925, "grad_norm": 8.978411114997986, "learning_rate": 9.997669430756567e-06, "loss": 19.04, "step": 2158 }, { "epoch": 0.03946478512804577, "grad_norm": 7.482174928325935, "learning_rate": 9.99766038510773e-06, "loss": 17.8164, "step": 2159 }, { "epoch": 0.03948306432449229, "grad_norm": 7.921072208230814, "learning_rate": 9.997651321942526e-06, "loss": 18.1876, "step": 2160 }, { "epoch": 0.03950134352093882, "grad_norm": 7.17372865010213, "learning_rate": 9.997642241260988e-06, "loss": 17.7621, "step": 2161 }, { "epoch": 0.03951962271738534, "grad_norm": 7.3970388554120605, "learning_rate": 9.997633143063147e-06, "loss": 17.8985, "step": 2162 }, { "epoch": 0.039537901913831865, "grad_norm": 8.093249250600254, "learning_rate": 9.997624027349038e-06, "loss": 18.1239, "step": 2163 }, { "epoch": 0.03955618111027839, "grad_norm": 9.891216885761784, "learning_rate": 9.99761489411869e-06, "loss": 18.365, "step": 2164 }, { "epoch": 0.039574460306724915, "grad_norm": 8.247118588942913, "learning_rate": 9.997605743372135e-06, "loss": 17.8682, "step": 2165 }, { "epoch": 0.039592739503171444, "grad_norm": 8.006775421381619, "learning_rate": 9.997596575109403e-06, "loss": 18.0026, "step": 2166 }, { "epoch": 0.039611018699617966, "grad_norm": 8.080014015242162, "learning_rate": 9.99758738933053e-06, "loss": 18.2187, "step": 2167 }, { "epoch": 0.03962929789606449, "grad_norm": 8.059337299934262, "learning_rate": 9.997578186035548e-06, "loss": 18.2853, "step": 2168 }, { "epoch": 0.039647577092511016, "grad_norm": 8.587820665517523, "learning_rate": 9.997568965224489e-06, "loss": 18.5507, "step": 2169 }, { "epoch": 0.03966585628895754, "grad_norm": 7.372263721174855, "learning_rate": 9.997559726897382e-06, "loss": 18.0406, "step": 2170 }, { "epoch": 0.03968413548540406, "grad_norm": 6.542605135963088, "learning_rate": 9.997550471054262e-06, "loss": 17.5526, "step": 2171 }, { "epoch": 0.03970241468185059, "grad_norm": 7.189110663432018, "learning_rate": 9.997541197695165e-06, "loss": 17.8062, "step": 2172 }, { "epoch": 0.03972069387829711, "grad_norm": 6.404821137499881, "learning_rate": 9.997531906820114e-06, "loss": 17.2803, "step": 2173 }, { "epoch": 0.03973897307474363, "grad_norm": 8.369116003250504, "learning_rate": 9.997522598429152e-06, "loss": 18.1112, "step": 2174 }, { "epoch": 0.03975725227119016, "grad_norm": 8.105760046900059, "learning_rate": 9.997513272522306e-06, "loss": 18.1757, "step": 2175 }, { "epoch": 0.03977553146763668, "grad_norm": 6.8914560879095, "learning_rate": 9.997503929099608e-06, "loss": 17.6367, "step": 2176 }, { "epoch": 0.039793810664083204, "grad_norm": 8.74928482923437, "learning_rate": 9.997494568161094e-06, "loss": 18.797, "step": 2177 }, { "epoch": 0.03981208986052973, "grad_norm": 7.606032195700107, "learning_rate": 9.997485189706794e-06, "loss": 18.0399, "step": 2178 }, { "epoch": 0.039830369056976254, "grad_norm": 7.957640315235665, "learning_rate": 9.997475793736742e-06, "loss": 18.1217, "step": 2179 }, { "epoch": 0.039848648253422776, "grad_norm": 8.172829907955222, "learning_rate": 9.997466380250972e-06, "loss": 17.9818, "step": 2180 }, { "epoch": 0.039866927449869305, "grad_norm": 8.639348016920609, "learning_rate": 9.997456949249516e-06, "loss": 18.3638, "step": 2181 }, { "epoch": 0.03988520664631583, "grad_norm": 8.28297815129272, "learning_rate": 9.997447500732408e-06, "loss": 18.0711, "step": 2182 }, { "epoch": 0.039903485842762355, "grad_norm": 7.301929917593747, "learning_rate": 9.997438034699676e-06, "loss": 17.9614, "step": 2183 }, { "epoch": 0.03992176503920888, "grad_norm": 9.2133706103282, "learning_rate": 9.99742855115136e-06, "loss": 18.3219, "step": 2184 }, { "epoch": 0.0399400442356554, "grad_norm": 7.6662851379575, "learning_rate": 9.997419050087491e-06, "loss": 18.0362, "step": 2185 }, { "epoch": 0.03995832343210193, "grad_norm": 9.40252182257759, "learning_rate": 9.997409531508102e-06, "loss": 18.7361, "step": 2186 }, { "epoch": 0.03997660262854845, "grad_norm": 7.715201964930718, "learning_rate": 9.997399995413225e-06, "loss": 18.0677, "step": 2187 }, { "epoch": 0.03999488182499497, "grad_norm": 8.180109037376967, "learning_rate": 9.997390441802896e-06, "loss": 17.8739, "step": 2188 }, { "epoch": 0.0400131610214415, "grad_norm": 7.474350082348093, "learning_rate": 9.997380870677147e-06, "loss": 17.736, "step": 2189 }, { "epoch": 0.04003144021788802, "grad_norm": 7.30539824775302, "learning_rate": 9.997371282036012e-06, "loss": 17.7862, "step": 2190 }, { "epoch": 0.04004971941433454, "grad_norm": 8.10049297693191, "learning_rate": 9.997361675879524e-06, "loss": 18.3183, "step": 2191 }, { "epoch": 0.04006799861078107, "grad_norm": 8.148440778879321, "learning_rate": 9.997352052207717e-06, "loss": 17.8117, "step": 2192 }, { "epoch": 0.040086277807227594, "grad_norm": 7.662804144562566, "learning_rate": 9.997342411020623e-06, "loss": 17.9414, "step": 2193 }, { "epoch": 0.040104557003674116, "grad_norm": 8.657393083701246, "learning_rate": 9.99733275231828e-06, "loss": 18.4035, "step": 2194 }, { "epoch": 0.040122836200120644, "grad_norm": 6.363443744243816, "learning_rate": 9.997323076100718e-06, "loss": 17.273, "step": 2195 }, { "epoch": 0.040141115396567166, "grad_norm": 8.061330105461877, "learning_rate": 9.997313382367973e-06, "loss": 18.3943, "step": 2196 }, { "epoch": 0.04015939459301369, "grad_norm": 7.726391277553473, "learning_rate": 9.997303671120077e-06, "loss": 17.7805, "step": 2197 }, { "epoch": 0.040177673789460217, "grad_norm": 8.212107448574407, "learning_rate": 9.997293942357065e-06, "loss": 17.96, "step": 2198 }, { "epoch": 0.04019595298590674, "grad_norm": 8.290238516875284, "learning_rate": 9.997284196078974e-06, "loss": 17.9312, "step": 2199 }, { "epoch": 0.04021423218235327, "grad_norm": 7.98989253657042, "learning_rate": 9.997274432285833e-06, "loss": 17.9388, "step": 2200 }, { "epoch": 0.04023251137879979, "grad_norm": 6.8044758666856815, "learning_rate": 9.997264650977681e-06, "loss": 17.4357, "step": 2201 }, { "epoch": 0.04025079057524631, "grad_norm": 8.221859777178526, "learning_rate": 9.997254852154548e-06, "loss": 18.1218, "step": 2202 }, { "epoch": 0.04026906977169284, "grad_norm": 8.345525451984651, "learning_rate": 9.997245035816471e-06, "loss": 18.1535, "step": 2203 }, { "epoch": 0.04028734896813936, "grad_norm": 7.8169540138706335, "learning_rate": 9.997235201963484e-06, "loss": 17.9238, "step": 2204 }, { "epoch": 0.04030562816458588, "grad_norm": 7.951482942811827, "learning_rate": 9.99722535059562e-06, "loss": 17.6738, "step": 2205 }, { "epoch": 0.04032390736103241, "grad_norm": 7.5674725394502, "learning_rate": 9.997215481712917e-06, "loss": 17.9341, "step": 2206 }, { "epoch": 0.04034218655747893, "grad_norm": 8.671669698506888, "learning_rate": 9.997205595315406e-06, "loss": 18.5597, "step": 2207 }, { "epoch": 0.040360465753925455, "grad_norm": 6.815993343653375, "learning_rate": 9.997195691403123e-06, "loss": 17.3178, "step": 2208 }, { "epoch": 0.040378744950371984, "grad_norm": 8.389172345015542, "learning_rate": 9.997185769976104e-06, "loss": 18.2111, "step": 2209 }, { "epoch": 0.040397024146818505, "grad_norm": 7.831804934723512, "learning_rate": 9.997175831034382e-06, "loss": 18.0893, "step": 2210 }, { "epoch": 0.04041530334326503, "grad_norm": 7.819787617633279, "learning_rate": 9.99716587457799e-06, "loss": 17.8114, "step": 2211 }, { "epoch": 0.040433582539711556, "grad_norm": 7.213440914722836, "learning_rate": 9.997155900606968e-06, "loss": 17.5792, "step": 2212 }, { "epoch": 0.04045186173615808, "grad_norm": 7.467749826895467, "learning_rate": 9.99714590912135e-06, "loss": 18.1147, "step": 2213 }, { "epoch": 0.0404701409326046, "grad_norm": 7.3030276478195715, "learning_rate": 9.997135900121164e-06, "loss": 17.7149, "step": 2214 }, { "epoch": 0.04048842012905113, "grad_norm": 9.368146596367051, "learning_rate": 9.997125873606452e-06, "loss": 18.43, "step": 2215 }, { "epoch": 0.04050669932549765, "grad_norm": 7.54753489549492, "learning_rate": 9.99711582957725e-06, "loss": 17.8491, "step": 2216 }, { "epoch": 0.04052497852194418, "grad_norm": 7.018911450745933, "learning_rate": 9.997105768033588e-06, "loss": 17.5845, "step": 2217 }, { "epoch": 0.0405432577183907, "grad_norm": 8.017074287783036, "learning_rate": 9.997095688975506e-06, "loss": 18.0492, "step": 2218 }, { "epoch": 0.04056153691483722, "grad_norm": 6.573016273674605, "learning_rate": 9.997085592403036e-06, "loss": 17.4416, "step": 2219 }, { "epoch": 0.04057981611128375, "grad_norm": 7.165610220362177, "learning_rate": 9.997075478316213e-06, "loss": 17.748, "step": 2220 }, { "epoch": 0.04059809530773027, "grad_norm": 8.116293660856455, "learning_rate": 9.997065346715079e-06, "loss": 18.1911, "step": 2221 }, { "epoch": 0.040616374504176794, "grad_norm": 8.492003188224011, "learning_rate": 9.99705519759966e-06, "loss": 18.0617, "step": 2222 }, { "epoch": 0.04063465370062332, "grad_norm": 6.45728722483106, "learning_rate": 9.997045030969997e-06, "loss": 17.3788, "step": 2223 }, { "epoch": 0.040652932897069845, "grad_norm": 9.357708050753015, "learning_rate": 9.997034846826126e-06, "loss": 18.6276, "step": 2224 }, { "epoch": 0.040671212093516367, "grad_norm": 8.108005144604292, "learning_rate": 9.99702464516808e-06, "loss": 18.2943, "step": 2225 }, { "epoch": 0.040689491289962895, "grad_norm": 7.856412638551117, "learning_rate": 9.997014425995898e-06, "loss": 18.0768, "step": 2226 }, { "epoch": 0.04070777048640942, "grad_norm": 7.9677426244573795, "learning_rate": 9.997004189309614e-06, "loss": 17.9339, "step": 2227 }, { "epoch": 0.04072604968285594, "grad_norm": 8.68121235006211, "learning_rate": 9.996993935109263e-06, "loss": 18.3509, "step": 2228 }, { "epoch": 0.04074432887930247, "grad_norm": 9.159087383676308, "learning_rate": 9.99698366339488e-06, "loss": 18.6396, "step": 2229 }, { "epoch": 0.04076260807574899, "grad_norm": 9.129100089272592, "learning_rate": 9.996973374166505e-06, "loss": 18.3589, "step": 2230 }, { "epoch": 0.04078088727219551, "grad_norm": 7.63200488625213, "learning_rate": 9.996963067424173e-06, "loss": 17.8978, "step": 2231 }, { "epoch": 0.04079916646864204, "grad_norm": 7.037195597311852, "learning_rate": 9.996952743167919e-06, "loss": 17.9303, "step": 2232 }, { "epoch": 0.04081744566508856, "grad_norm": 6.697245553985456, "learning_rate": 9.996942401397776e-06, "loss": 17.2696, "step": 2233 }, { "epoch": 0.04083572486153509, "grad_norm": 8.364622068437564, "learning_rate": 9.996932042113785e-06, "loss": 18.2497, "step": 2234 }, { "epoch": 0.04085400405798161, "grad_norm": 7.55560813284806, "learning_rate": 9.996921665315982e-06, "loss": 17.7116, "step": 2235 }, { "epoch": 0.040872283254428134, "grad_norm": 7.189021265997685, "learning_rate": 9.996911271004403e-06, "loss": 17.7219, "step": 2236 }, { "epoch": 0.04089056245087466, "grad_norm": 8.405771124735566, "learning_rate": 9.996900859179082e-06, "loss": 18.4455, "step": 2237 }, { "epoch": 0.040908841647321184, "grad_norm": 8.110808715552439, "learning_rate": 9.996890429840057e-06, "loss": 18.0719, "step": 2238 }, { "epoch": 0.040927120843767706, "grad_norm": 6.7657326654624805, "learning_rate": 9.996879982987365e-06, "loss": 17.6897, "step": 2239 }, { "epoch": 0.040945400040214235, "grad_norm": 7.902971602385438, "learning_rate": 9.996869518621043e-06, "loss": 17.916, "step": 2240 }, { "epoch": 0.040963679236660756, "grad_norm": 8.739284291169165, "learning_rate": 9.996859036741125e-06, "loss": 18.4968, "step": 2241 }, { "epoch": 0.04098195843310728, "grad_norm": 8.863783295034544, "learning_rate": 9.996848537347651e-06, "loss": 18.4603, "step": 2242 }, { "epoch": 0.04100023762955381, "grad_norm": 8.827975485715196, "learning_rate": 9.996838020440656e-06, "loss": 18.1549, "step": 2243 }, { "epoch": 0.04101851682600033, "grad_norm": 7.242534711509926, "learning_rate": 9.996827486020178e-06, "loss": 18.0313, "step": 2244 }, { "epoch": 0.04103679602244685, "grad_norm": 8.24530816384671, "learning_rate": 9.996816934086253e-06, "loss": 18.1642, "step": 2245 }, { "epoch": 0.04105507521889338, "grad_norm": 8.827207274456681, "learning_rate": 9.996806364638917e-06, "loss": 17.7236, "step": 2246 }, { "epoch": 0.0410733544153399, "grad_norm": 7.681208695267572, "learning_rate": 9.996795777678212e-06, "loss": 18.0287, "step": 2247 }, { "epoch": 0.04109163361178642, "grad_norm": 7.7030414415403685, "learning_rate": 9.996785173204168e-06, "loss": 17.817, "step": 2248 }, { "epoch": 0.04110991280823295, "grad_norm": 7.760771743532918, "learning_rate": 9.996774551216825e-06, "loss": 17.6442, "step": 2249 }, { "epoch": 0.04112819200467947, "grad_norm": 7.646132278653959, "learning_rate": 9.996763911716223e-06, "loss": 17.8221, "step": 2250 }, { "epoch": 0.041146471201126, "grad_norm": 7.664331066563644, "learning_rate": 9.996753254702396e-06, "loss": 18.082, "step": 2251 }, { "epoch": 0.04116475039757252, "grad_norm": 8.051571433608776, "learning_rate": 9.996742580175383e-06, "loss": 17.8551, "step": 2252 }, { "epoch": 0.041183029594019045, "grad_norm": 9.186677857311098, "learning_rate": 9.996731888135221e-06, "loss": 18.7887, "step": 2253 }, { "epoch": 0.041201308790465574, "grad_norm": 9.121127798151235, "learning_rate": 9.996721178581948e-06, "loss": 18.4585, "step": 2254 }, { "epoch": 0.041219587986912096, "grad_norm": 7.671654595276185, "learning_rate": 9.9967104515156e-06, "loss": 17.8728, "step": 2255 }, { "epoch": 0.04123786718335862, "grad_norm": 6.970048735806223, "learning_rate": 9.996699706936214e-06, "loss": 17.661, "step": 2256 }, { "epoch": 0.041256146379805146, "grad_norm": 7.430312463763205, "learning_rate": 9.99668894484383e-06, "loss": 18.0237, "step": 2257 }, { "epoch": 0.04127442557625167, "grad_norm": 7.511826843925589, "learning_rate": 9.996678165238486e-06, "loss": 17.7627, "step": 2258 }, { "epoch": 0.04129270477269819, "grad_norm": 8.420303737893573, "learning_rate": 9.996667368120219e-06, "loss": 18.2609, "step": 2259 }, { "epoch": 0.04131098396914472, "grad_norm": 7.10238546153036, "learning_rate": 9.996656553489063e-06, "loss": 17.7902, "step": 2260 }, { "epoch": 0.04132926316559124, "grad_norm": 6.945695856385279, "learning_rate": 9.996645721345064e-06, "loss": 17.4524, "step": 2261 }, { "epoch": 0.04134754236203776, "grad_norm": 6.764161856334369, "learning_rate": 9.996634871688252e-06, "loss": 17.5518, "step": 2262 }, { "epoch": 0.04136582155848429, "grad_norm": 7.665182816750473, "learning_rate": 9.99662400451867e-06, "loss": 18.115, "step": 2263 }, { "epoch": 0.04138410075493081, "grad_norm": 8.059909396412177, "learning_rate": 9.996613119836354e-06, "loss": 18.2972, "step": 2264 }, { "epoch": 0.041402379951377334, "grad_norm": 8.569543981619749, "learning_rate": 9.996602217641342e-06, "loss": 18.4112, "step": 2265 }, { "epoch": 0.04142065914782386, "grad_norm": 7.7239610781494425, "learning_rate": 9.996591297933674e-06, "loss": 17.869, "step": 2266 }, { "epoch": 0.041438938344270385, "grad_norm": 9.416283894916795, "learning_rate": 9.996580360713386e-06, "loss": 18.6436, "step": 2267 }, { "epoch": 0.04145721754071691, "grad_norm": 7.5379396551312885, "learning_rate": 9.996569405980517e-06, "loss": 17.797, "step": 2268 }, { "epoch": 0.041475496737163435, "grad_norm": 7.955023806419039, "learning_rate": 9.996558433735106e-06, "loss": 17.8707, "step": 2269 }, { "epoch": 0.04149377593360996, "grad_norm": 8.470373947819228, "learning_rate": 9.996547443977193e-06, "loss": 18.0362, "step": 2270 }, { "epoch": 0.041512055130056486, "grad_norm": 8.130937681452403, "learning_rate": 9.996536436706815e-06, "loss": 17.9607, "step": 2271 }, { "epoch": 0.04153033432650301, "grad_norm": 9.185888336239445, "learning_rate": 9.996525411924008e-06, "loss": 18.8378, "step": 2272 }, { "epoch": 0.04154861352294953, "grad_norm": 7.703635781287982, "learning_rate": 9.996514369628813e-06, "loss": 17.9426, "step": 2273 }, { "epoch": 0.04156689271939606, "grad_norm": 8.686634293642456, "learning_rate": 9.99650330982127e-06, "loss": 18.4029, "step": 2274 }, { "epoch": 0.04158517191584258, "grad_norm": 7.505067804924687, "learning_rate": 9.996492232501416e-06, "loss": 17.9971, "step": 2275 }, { "epoch": 0.0416034511122891, "grad_norm": 8.266241750127806, "learning_rate": 9.996481137669291e-06, "loss": 18.1485, "step": 2276 }, { "epoch": 0.04162173030873563, "grad_norm": 6.9270516165961755, "learning_rate": 9.996470025324933e-06, "loss": 17.66, "step": 2277 }, { "epoch": 0.04164000950518215, "grad_norm": 8.25286348451379, "learning_rate": 9.99645889546838e-06, "loss": 18.0654, "step": 2278 }, { "epoch": 0.041658288701628673, "grad_norm": 7.159158430658167, "learning_rate": 9.996447748099673e-06, "loss": 17.8872, "step": 2279 }, { "epoch": 0.0416765678980752, "grad_norm": 6.995221413745061, "learning_rate": 9.996436583218852e-06, "loss": 17.4837, "step": 2280 }, { "epoch": 0.041694847094521724, "grad_norm": 6.775646813300009, "learning_rate": 9.996425400825952e-06, "loss": 17.4218, "step": 2281 }, { "epoch": 0.041713126290968246, "grad_norm": 6.985900307532805, "learning_rate": 9.996414200921014e-06, "loss": 17.6457, "step": 2282 }, { "epoch": 0.041731405487414774, "grad_norm": 6.908522988366118, "learning_rate": 9.996402983504079e-06, "loss": 17.5612, "step": 2283 }, { "epoch": 0.041749684683861296, "grad_norm": 8.094841746586784, "learning_rate": 9.996391748575184e-06, "loss": 18.0505, "step": 2284 }, { "epoch": 0.041767963880307825, "grad_norm": 7.070760098061046, "learning_rate": 9.996380496134372e-06, "loss": 17.9362, "step": 2285 }, { "epoch": 0.04178624307675435, "grad_norm": 8.978344297434703, "learning_rate": 9.996369226181678e-06, "loss": 18.6022, "step": 2286 }, { "epoch": 0.04180452227320087, "grad_norm": 9.632714862373605, "learning_rate": 9.996357938717144e-06, "loss": 18.3985, "step": 2287 }, { "epoch": 0.0418228014696474, "grad_norm": 8.054088454669444, "learning_rate": 9.996346633740809e-06, "loss": 18.1617, "step": 2288 }, { "epoch": 0.04184108066609392, "grad_norm": 8.577846213672203, "learning_rate": 9.996335311252712e-06, "loss": 18.3944, "step": 2289 }, { "epoch": 0.04185935986254044, "grad_norm": 6.67528905552185, "learning_rate": 9.996323971252895e-06, "loss": 17.6992, "step": 2290 }, { "epoch": 0.04187763905898697, "grad_norm": 8.042630167840407, "learning_rate": 9.996312613741394e-06, "loss": 18.6025, "step": 2291 }, { "epoch": 0.04189591825543349, "grad_norm": 7.633142920361117, "learning_rate": 9.996301238718251e-06, "loss": 18.0448, "step": 2292 }, { "epoch": 0.04191419745188001, "grad_norm": 8.627363254939468, "learning_rate": 9.996289846183506e-06, "loss": 18.4076, "step": 2293 }, { "epoch": 0.04193247664832654, "grad_norm": 6.953966791148869, "learning_rate": 9.9962784361372e-06, "loss": 17.5413, "step": 2294 }, { "epoch": 0.04195075584477306, "grad_norm": 8.570535225232478, "learning_rate": 9.99626700857937e-06, "loss": 18.5337, "step": 2295 }, { "epoch": 0.041969035041219585, "grad_norm": 7.60298214718083, "learning_rate": 9.996255563510059e-06, "loss": 17.8707, "step": 2296 }, { "epoch": 0.041987314237666114, "grad_norm": 8.817426993262535, "learning_rate": 9.996244100929305e-06, "loss": 18.5549, "step": 2297 }, { "epoch": 0.042005593434112636, "grad_norm": 8.051074390814165, "learning_rate": 9.99623262083715e-06, "loss": 18.0903, "step": 2298 }, { "epoch": 0.04202387263055916, "grad_norm": 6.914049677660477, "learning_rate": 9.996221123233631e-06, "loss": 17.652, "step": 2299 }, { "epoch": 0.042042151827005686, "grad_norm": 9.300933604045385, "learning_rate": 9.996209608118792e-06, "loss": 18.8076, "step": 2300 }, { "epoch": 0.04206043102345221, "grad_norm": 8.587086318109883, "learning_rate": 9.99619807549267e-06, "loss": 18.1632, "step": 2301 }, { "epoch": 0.042078710219898736, "grad_norm": 8.43519282951352, "learning_rate": 9.996186525355312e-06, "loss": 17.9335, "step": 2302 }, { "epoch": 0.04209698941634526, "grad_norm": 7.099712415454915, "learning_rate": 9.99617495770675e-06, "loss": 17.8879, "step": 2303 }, { "epoch": 0.04211526861279178, "grad_norm": 7.410752085634336, "learning_rate": 9.99616337254703e-06, "loss": 17.6165, "step": 2304 }, { "epoch": 0.04213354780923831, "grad_norm": 9.23145486560416, "learning_rate": 9.99615176987619e-06, "loss": 18.5385, "step": 2305 }, { "epoch": 0.04215182700568483, "grad_norm": 7.804166649928118, "learning_rate": 9.996140149694271e-06, "loss": 18.1071, "step": 2306 }, { "epoch": 0.04217010620213135, "grad_norm": 7.292123905585162, "learning_rate": 9.996128512001315e-06, "loss": 17.7422, "step": 2307 }, { "epoch": 0.04218838539857788, "grad_norm": 7.915688154902509, "learning_rate": 9.996116856797361e-06, "loss": 17.8606, "step": 2308 }, { "epoch": 0.0422066645950244, "grad_norm": 7.326183825319675, "learning_rate": 9.996105184082451e-06, "loss": 17.6888, "step": 2309 }, { "epoch": 0.042224943791470924, "grad_norm": 7.985166596550133, "learning_rate": 9.996093493856629e-06, "loss": 17.962, "step": 2310 }, { "epoch": 0.04224322298791745, "grad_norm": 8.03668699206688, "learning_rate": 9.996081786119932e-06, "loss": 18.0788, "step": 2311 }, { "epoch": 0.042261502184363975, "grad_norm": 7.6983489170030825, "learning_rate": 9.996070060872397e-06, "loss": 17.7543, "step": 2312 }, { "epoch": 0.0422797813808105, "grad_norm": 8.654245011201022, "learning_rate": 9.996058318114076e-06, "loss": 18.1858, "step": 2313 }, { "epoch": 0.042298060577257025, "grad_norm": 7.674808547428686, "learning_rate": 9.996046557845e-06, "loss": 17.8757, "step": 2314 }, { "epoch": 0.04231633977370355, "grad_norm": 7.9920460527959305, "learning_rate": 9.996034780065218e-06, "loss": 17.7592, "step": 2315 }, { "epoch": 0.04233461897015007, "grad_norm": 8.769256034609777, "learning_rate": 9.996022984774764e-06, "loss": 18.0602, "step": 2316 }, { "epoch": 0.0423528981665966, "grad_norm": 7.93222165293833, "learning_rate": 9.996011171973686e-06, "loss": 18.0744, "step": 2317 }, { "epoch": 0.04237117736304312, "grad_norm": 7.781045243938328, "learning_rate": 9.995999341662021e-06, "loss": 17.9564, "step": 2318 }, { "epoch": 0.04238945655948965, "grad_norm": 7.744615501885769, "learning_rate": 9.995987493839812e-06, "loss": 17.7038, "step": 2319 }, { "epoch": 0.04240773575593617, "grad_norm": 7.516691960954121, "learning_rate": 9.995975628507099e-06, "loss": 17.6575, "step": 2320 }, { "epoch": 0.04242601495238269, "grad_norm": 6.115400759637814, "learning_rate": 9.995963745663928e-06, "loss": 17.475, "step": 2321 }, { "epoch": 0.04244429414882922, "grad_norm": 7.22892817618116, "learning_rate": 9.995951845310334e-06, "loss": 17.5793, "step": 2322 }, { "epoch": 0.04246257334527574, "grad_norm": 8.062822196393663, "learning_rate": 9.995939927446366e-06, "loss": 18.0531, "step": 2323 }, { "epoch": 0.042480852541722264, "grad_norm": 7.660615550768664, "learning_rate": 9.995927992072058e-06, "loss": 17.7797, "step": 2324 }, { "epoch": 0.04249913173816879, "grad_norm": 7.982847840410579, "learning_rate": 9.995916039187458e-06, "loss": 18.2002, "step": 2325 }, { "epoch": 0.042517410934615314, "grad_norm": 8.785543921953549, "learning_rate": 9.995904068792607e-06, "loss": 18.5416, "step": 2326 }, { "epoch": 0.042535690131061836, "grad_norm": 7.097569729541003, "learning_rate": 9.995892080887545e-06, "loss": 17.6896, "step": 2327 }, { "epoch": 0.042553969327508365, "grad_norm": 8.176532027468705, "learning_rate": 9.995880075472315e-06, "loss": 18.2831, "step": 2328 }, { "epoch": 0.042572248523954886, "grad_norm": 7.885895150721695, "learning_rate": 9.995868052546957e-06, "loss": 17.9436, "step": 2329 }, { "epoch": 0.04259052772040141, "grad_norm": 6.775177913906577, "learning_rate": 9.995856012111517e-06, "loss": 17.3252, "step": 2330 }, { "epoch": 0.04260880691684794, "grad_norm": 7.94354048743229, "learning_rate": 9.995843954166036e-06, "loss": 17.9838, "step": 2331 }, { "epoch": 0.04262708611329446, "grad_norm": 7.764688523989066, "learning_rate": 9.995831878710553e-06, "loss": 17.9115, "step": 2332 }, { "epoch": 0.04264536530974098, "grad_norm": 7.409280094124277, "learning_rate": 9.995819785745113e-06, "loss": 17.729, "step": 2333 }, { "epoch": 0.04266364450618751, "grad_norm": 6.778335589173222, "learning_rate": 9.995807675269759e-06, "loss": 17.6421, "step": 2334 }, { "epoch": 0.04268192370263403, "grad_norm": 8.37586586816977, "learning_rate": 9.995795547284533e-06, "loss": 18.3069, "step": 2335 }, { "epoch": 0.04270020289908056, "grad_norm": 8.1383868168471, "learning_rate": 9.995783401789476e-06, "loss": 17.9825, "step": 2336 }, { "epoch": 0.04271848209552708, "grad_norm": 8.10989994308069, "learning_rate": 9.995771238784633e-06, "loss": 18.2562, "step": 2337 }, { "epoch": 0.0427367612919736, "grad_norm": 7.72664475872143, "learning_rate": 9.995759058270046e-06, "loss": 17.9149, "step": 2338 }, { "epoch": 0.04275504048842013, "grad_norm": 8.247015240804508, "learning_rate": 9.995746860245754e-06, "loss": 18.2216, "step": 2339 }, { "epoch": 0.042773319684866654, "grad_norm": 7.422653431185456, "learning_rate": 9.995734644711806e-06, "loss": 17.6408, "step": 2340 }, { "epoch": 0.042791598881313175, "grad_norm": 7.072127301139403, "learning_rate": 9.99572241166824e-06, "loss": 17.7117, "step": 2341 }, { "epoch": 0.042809878077759704, "grad_norm": 7.858912139734376, "learning_rate": 9.9957101611151e-06, "loss": 18.0305, "step": 2342 }, { "epoch": 0.042828157274206226, "grad_norm": 8.45495617543098, "learning_rate": 9.99569789305243e-06, "loss": 18.0969, "step": 2343 }, { "epoch": 0.04284643647065275, "grad_norm": 8.59786241900796, "learning_rate": 9.995685607480272e-06, "loss": 18.3043, "step": 2344 }, { "epoch": 0.042864715667099276, "grad_norm": 7.112209556935003, "learning_rate": 9.99567330439867e-06, "loss": 17.716, "step": 2345 }, { "epoch": 0.0428829948635458, "grad_norm": 7.4017512032226565, "learning_rate": 9.995660983807667e-06, "loss": 17.7608, "step": 2346 }, { "epoch": 0.04290127405999232, "grad_norm": 8.037185708433707, "learning_rate": 9.995648645707305e-06, "loss": 18.2411, "step": 2347 }, { "epoch": 0.04291955325643885, "grad_norm": 8.216894936542884, "learning_rate": 9.995636290097627e-06, "loss": 18.3719, "step": 2348 }, { "epoch": 0.04293783245288537, "grad_norm": 7.685407484316789, "learning_rate": 9.99562391697868e-06, "loss": 17.9064, "step": 2349 }, { "epoch": 0.04295611164933189, "grad_norm": 8.427764315977564, "learning_rate": 9.995611526350502e-06, "loss": 18.5452, "step": 2350 }, { "epoch": 0.04297439084577842, "grad_norm": 8.095761168162861, "learning_rate": 9.99559911821314e-06, "loss": 17.9802, "step": 2351 }, { "epoch": 0.04299267004222494, "grad_norm": 7.0316064907918445, "learning_rate": 9.995586692566636e-06, "loss": 17.8435, "step": 2352 }, { "epoch": 0.04301094923867147, "grad_norm": 7.839253788821011, "learning_rate": 9.995574249411035e-06, "loss": 17.8707, "step": 2353 }, { "epoch": 0.04302922843511799, "grad_norm": 7.38435360392192, "learning_rate": 9.99556178874638e-06, "loss": 17.7948, "step": 2354 }, { "epoch": 0.043047507631564515, "grad_norm": 7.911927060871132, "learning_rate": 9.995549310572714e-06, "loss": 18.0407, "step": 2355 }, { "epoch": 0.04306578682801104, "grad_norm": 7.234185652413484, "learning_rate": 9.995536814890081e-06, "loss": 17.8514, "step": 2356 }, { "epoch": 0.043084066024457565, "grad_norm": 7.87114672882479, "learning_rate": 9.995524301698525e-06, "loss": 17.9316, "step": 2357 }, { "epoch": 0.04310234522090409, "grad_norm": 6.701925047119131, "learning_rate": 9.995511770998089e-06, "loss": 17.5415, "step": 2358 }, { "epoch": 0.043120624417350616, "grad_norm": 7.237738766893789, "learning_rate": 9.99549922278882e-06, "loss": 17.8181, "step": 2359 }, { "epoch": 0.04313890361379714, "grad_norm": 8.366866841506923, "learning_rate": 9.995486657070758e-06, "loss": 18.3393, "step": 2360 }, { "epoch": 0.04315718281024366, "grad_norm": 8.159031238202944, "learning_rate": 9.99547407384395e-06, "loss": 18.1373, "step": 2361 }, { "epoch": 0.04317546200669019, "grad_norm": 9.519533536980633, "learning_rate": 9.99546147310844e-06, "loss": 19.0391, "step": 2362 }, { "epoch": 0.04319374120313671, "grad_norm": 7.203080784424371, "learning_rate": 9.995448854864267e-06, "loss": 17.6502, "step": 2363 }, { "epoch": 0.04321202039958323, "grad_norm": 7.785504030875423, "learning_rate": 9.99543621911148e-06, "loss": 18.0516, "step": 2364 }, { "epoch": 0.04323029959602976, "grad_norm": 7.418746620233205, "learning_rate": 9.995423565850125e-06, "loss": 17.7921, "step": 2365 }, { "epoch": 0.04324857879247628, "grad_norm": 6.901505175666439, "learning_rate": 9.995410895080242e-06, "loss": 17.5693, "step": 2366 }, { "epoch": 0.043266857988922804, "grad_norm": 7.735162149311585, "learning_rate": 9.995398206801878e-06, "loss": 17.93, "step": 2367 }, { "epoch": 0.04328513718536933, "grad_norm": 7.727478640094747, "learning_rate": 9.995385501015079e-06, "loss": 17.6187, "step": 2368 }, { "epoch": 0.043303416381815854, "grad_norm": 6.75990624060066, "learning_rate": 9.995372777719885e-06, "loss": 17.6831, "step": 2369 }, { "epoch": 0.04332169557826238, "grad_norm": 7.396815271837521, "learning_rate": 9.995360036916342e-06, "loss": 17.6261, "step": 2370 }, { "epoch": 0.043339974774708905, "grad_norm": 8.951021034027882, "learning_rate": 9.995347278604497e-06, "loss": 18.623, "step": 2371 }, { "epoch": 0.043358253971155426, "grad_norm": 7.478975138883387, "learning_rate": 9.995334502784392e-06, "loss": 17.8627, "step": 2372 }, { "epoch": 0.043376533167601955, "grad_norm": 7.210516005272581, "learning_rate": 9.995321709456074e-06, "loss": 17.6906, "step": 2373 }, { "epoch": 0.04339481236404848, "grad_norm": 8.089111675125123, "learning_rate": 9.995308898619587e-06, "loss": 17.8964, "step": 2374 }, { "epoch": 0.043413091560495, "grad_norm": 7.221725490241234, "learning_rate": 9.995296070274976e-06, "loss": 17.7029, "step": 2375 }, { "epoch": 0.04343137075694153, "grad_norm": 9.049432813964266, "learning_rate": 9.995283224422285e-06, "loss": 18.5432, "step": 2376 }, { "epoch": 0.04344964995338805, "grad_norm": 8.101618835608113, "learning_rate": 9.99527036106156e-06, "loss": 17.9293, "step": 2377 }, { "epoch": 0.04346792914983457, "grad_norm": 7.583140210100685, "learning_rate": 9.995257480192845e-06, "loss": 17.9971, "step": 2378 }, { "epoch": 0.0434862083462811, "grad_norm": 7.069738679577622, "learning_rate": 9.995244581816189e-06, "loss": 17.5991, "step": 2379 }, { "epoch": 0.04350448754272762, "grad_norm": 7.385570093641195, "learning_rate": 9.99523166593163e-06, "loss": 17.629, "step": 2380 }, { "epoch": 0.04352276673917414, "grad_norm": 7.691840493197007, "learning_rate": 9.99521873253922e-06, "loss": 17.9091, "step": 2381 }, { "epoch": 0.04354104593562067, "grad_norm": 8.304064472508092, "learning_rate": 9.995205781639001e-06, "loss": 18.22, "step": 2382 }, { "epoch": 0.04355932513206719, "grad_norm": 8.722371398782412, "learning_rate": 9.99519281323102e-06, "loss": 17.6181, "step": 2383 }, { "epoch": 0.043577604328513715, "grad_norm": 8.297329667723888, "learning_rate": 9.995179827315321e-06, "loss": 18.0572, "step": 2384 }, { "epoch": 0.043595883524960244, "grad_norm": 6.547568797748973, "learning_rate": 9.99516682389195e-06, "loss": 17.7223, "step": 2385 }, { "epoch": 0.043614162721406766, "grad_norm": 7.8858528862674016, "learning_rate": 9.995153802960952e-06, "loss": 18.2852, "step": 2386 }, { "epoch": 0.043632441917853294, "grad_norm": 7.346611184463637, "learning_rate": 9.995140764522377e-06, "loss": 17.7132, "step": 2387 }, { "epoch": 0.043650721114299816, "grad_norm": 7.232702754468648, "learning_rate": 9.995127708576265e-06, "loss": 17.7087, "step": 2388 }, { "epoch": 0.04366900031074634, "grad_norm": 7.8902377192310285, "learning_rate": 9.995114635122663e-06, "loss": 18.1182, "step": 2389 }, { "epoch": 0.04368727950719287, "grad_norm": 7.541585681087609, "learning_rate": 9.995101544161617e-06, "loss": 17.9787, "step": 2390 }, { "epoch": 0.04370555870363939, "grad_norm": 8.022977044455, "learning_rate": 9.995088435693174e-06, "loss": 18.0081, "step": 2391 }, { "epoch": 0.04372383790008591, "grad_norm": 9.649686114276376, "learning_rate": 9.995075309717382e-06, "loss": 18.4127, "step": 2392 }, { "epoch": 0.04374211709653244, "grad_norm": 9.461293606285105, "learning_rate": 9.995062166234281e-06, "loss": 18.5204, "step": 2393 }, { "epoch": 0.04376039629297896, "grad_norm": 7.099137168277874, "learning_rate": 9.995049005243922e-06, "loss": 18.0249, "step": 2394 }, { "epoch": 0.04377867548942548, "grad_norm": 7.888544589686886, "learning_rate": 9.995035826746351e-06, "loss": 18.1497, "step": 2395 }, { "epoch": 0.04379695468587201, "grad_norm": 8.461906939436192, "learning_rate": 9.99502263074161e-06, "loss": 17.9549, "step": 2396 }, { "epoch": 0.04381523388231853, "grad_norm": 8.681457125286627, "learning_rate": 9.99500941722975e-06, "loss": 18.5861, "step": 2397 }, { "epoch": 0.043833513078765055, "grad_norm": 7.982245776764294, "learning_rate": 9.994996186210818e-06, "loss": 17.9586, "step": 2398 }, { "epoch": 0.04385179227521158, "grad_norm": 6.70762930155129, "learning_rate": 9.994982937684854e-06, "loss": 17.6772, "step": 2399 }, { "epoch": 0.043870071471658105, "grad_norm": 7.625164098269424, "learning_rate": 9.994969671651908e-06, "loss": 18.307, "step": 2400 }, { "epoch": 0.04388835066810463, "grad_norm": 8.025152234520164, "learning_rate": 9.99495638811203e-06, "loss": 17.929, "step": 2401 }, { "epoch": 0.043906629864551155, "grad_norm": 7.188413988586524, "learning_rate": 9.99494308706526e-06, "loss": 17.5913, "step": 2402 }, { "epoch": 0.04392490906099768, "grad_norm": 7.566729513834296, "learning_rate": 9.99492976851165e-06, "loss": 18.0366, "step": 2403 }, { "epoch": 0.043943188257444206, "grad_norm": 7.22772701732747, "learning_rate": 9.994916432451245e-06, "loss": 17.6594, "step": 2404 }, { "epoch": 0.04396146745389073, "grad_norm": 8.347832930396576, "learning_rate": 9.99490307888409e-06, "loss": 18.0701, "step": 2405 }, { "epoch": 0.04397974665033725, "grad_norm": 7.283857657416851, "learning_rate": 9.994889707810234e-06, "loss": 17.6575, "step": 2406 }, { "epoch": 0.04399802584678378, "grad_norm": 6.820984119408071, "learning_rate": 9.99487631922972e-06, "loss": 17.2445, "step": 2407 }, { "epoch": 0.0440163050432303, "grad_norm": 7.403463314533847, "learning_rate": 9.994862913142602e-06, "loss": 18.2281, "step": 2408 }, { "epoch": 0.04403458423967682, "grad_norm": 7.991456749842262, "learning_rate": 9.994849489548921e-06, "loss": 18.057, "step": 2409 }, { "epoch": 0.04405286343612335, "grad_norm": 7.159303653286646, "learning_rate": 9.994836048448726e-06, "loss": 17.8026, "step": 2410 }, { "epoch": 0.04407114263256987, "grad_norm": 7.207852452805044, "learning_rate": 9.994822589842065e-06, "loss": 18.0369, "step": 2411 }, { "epoch": 0.044089421829016394, "grad_norm": 8.142630712018148, "learning_rate": 9.994809113728982e-06, "loss": 18.0258, "step": 2412 }, { "epoch": 0.04410770102546292, "grad_norm": 7.681017370419984, "learning_rate": 9.99479562010953e-06, "loss": 18.1285, "step": 2413 }, { "epoch": 0.044125980221909444, "grad_norm": 6.686508257741154, "learning_rate": 9.994782108983749e-06, "loss": 17.2982, "step": 2414 }, { "epoch": 0.044144259418355966, "grad_norm": 8.305098334149148, "learning_rate": 9.994768580351692e-06, "loss": 18.1322, "step": 2415 }, { "epoch": 0.044162538614802495, "grad_norm": 8.117129217177633, "learning_rate": 9.994755034213405e-06, "loss": 18.311, "step": 2416 }, { "epoch": 0.04418081781124902, "grad_norm": 7.380525097184161, "learning_rate": 9.994741470568937e-06, "loss": 17.8848, "step": 2417 }, { "epoch": 0.04419909700769554, "grad_norm": 7.3047276088671005, "learning_rate": 9.99472788941833e-06, "loss": 17.8573, "step": 2418 }, { "epoch": 0.04421737620414207, "grad_norm": 7.392153203517438, "learning_rate": 9.994714290761636e-06, "loss": 17.8191, "step": 2419 }, { "epoch": 0.04423565540058859, "grad_norm": 7.1426956309508896, "learning_rate": 9.994700674598902e-06, "loss": 17.4996, "step": 2420 }, { "epoch": 0.04425393459703512, "grad_norm": 8.566850220352459, "learning_rate": 9.994687040930176e-06, "loss": 17.8436, "step": 2421 }, { "epoch": 0.04427221379348164, "grad_norm": 9.009723701167259, "learning_rate": 9.994673389755504e-06, "loss": 18.4966, "step": 2422 }, { "epoch": 0.04429049298992816, "grad_norm": 7.648464419628126, "learning_rate": 9.994659721074937e-06, "loss": 17.5574, "step": 2423 }, { "epoch": 0.04430877218637469, "grad_norm": 6.899282785325929, "learning_rate": 9.99464603488852e-06, "loss": 17.5233, "step": 2424 }, { "epoch": 0.04432705138282121, "grad_norm": 7.419090401494869, "learning_rate": 9.994632331196303e-06, "loss": 17.6791, "step": 2425 }, { "epoch": 0.04434533057926773, "grad_norm": 7.856012202760776, "learning_rate": 9.994618609998333e-06, "loss": 17.9967, "step": 2426 }, { "epoch": 0.04436360977571426, "grad_norm": 7.899339983429512, "learning_rate": 9.994604871294658e-06, "loss": 18.1975, "step": 2427 }, { "epoch": 0.044381888972160784, "grad_norm": 7.842972356313221, "learning_rate": 9.994591115085324e-06, "loss": 18.1075, "step": 2428 }, { "epoch": 0.044400168168607305, "grad_norm": 8.647233203391728, "learning_rate": 9.994577341370384e-06, "loss": 18.4699, "step": 2429 }, { "epoch": 0.044418447365053834, "grad_norm": 8.191500746778265, "learning_rate": 9.994563550149884e-06, "loss": 18.016, "step": 2430 }, { "epoch": 0.044436726561500356, "grad_norm": 9.147080973632784, "learning_rate": 9.994549741423871e-06, "loss": 18.5194, "step": 2431 }, { "epoch": 0.04445500575794688, "grad_norm": 7.428488346981479, "learning_rate": 9.994535915192396e-06, "loss": 17.7912, "step": 2432 }, { "epoch": 0.044473284954393406, "grad_norm": 7.011024965235697, "learning_rate": 9.994522071455505e-06, "loss": 17.6894, "step": 2433 }, { "epoch": 0.04449156415083993, "grad_norm": 8.19376074623173, "learning_rate": 9.994508210213248e-06, "loss": 17.7326, "step": 2434 }, { "epoch": 0.04450984334728645, "grad_norm": 7.26414722027003, "learning_rate": 9.994494331465672e-06, "loss": 17.8731, "step": 2435 }, { "epoch": 0.04452812254373298, "grad_norm": 7.258459064949432, "learning_rate": 9.994480435212827e-06, "loss": 17.8062, "step": 2436 }, { "epoch": 0.0445464017401795, "grad_norm": 6.059524784370213, "learning_rate": 9.994466521454762e-06, "loss": 17.1452, "step": 2437 }, { "epoch": 0.04456468093662603, "grad_norm": 6.39379233735651, "learning_rate": 9.994452590191525e-06, "loss": 17.6071, "step": 2438 }, { "epoch": 0.04458296013307255, "grad_norm": 7.5348896456735615, "learning_rate": 9.994438641423166e-06, "loss": 18.2033, "step": 2439 }, { "epoch": 0.04460123932951907, "grad_norm": 6.765819290377159, "learning_rate": 9.994424675149733e-06, "loss": 17.7391, "step": 2440 }, { "epoch": 0.0446195185259656, "grad_norm": 8.045367878632184, "learning_rate": 9.994410691371274e-06, "loss": 18.264, "step": 2441 }, { "epoch": 0.04463779772241212, "grad_norm": 8.450665323988279, "learning_rate": 9.99439669008784e-06, "loss": 17.9337, "step": 2442 }, { "epoch": 0.044656076918858645, "grad_norm": 7.566173010961893, "learning_rate": 9.994382671299477e-06, "loss": 17.8623, "step": 2443 }, { "epoch": 0.044674356115305174, "grad_norm": 7.130095907079948, "learning_rate": 9.994368635006238e-06, "loss": 17.78, "step": 2444 }, { "epoch": 0.044692635311751695, "grad_norm": 7.27100666984323, "learning_rate": 9.99435458120817e-06, "loss": 17.8144, "step": 2445 }, { "epoch": 0.04471091450819822, "grad_norm": 7.489837115950441, "learning_rate": 9.994340509905321e-06, "loss": 18.0407, "step": 2446 }, { "epoch": 0.044729193704644746, "grad_norm": 7.612045002891485, "learning_rate": 9.994326421097744e-06, "loss": 18.1473, "step": 2447 }, { "epoch": 0.04474747290109127, "grad_norm": 7.642973655488191, "learning_rate": 9.994312314785486e-06, "loss": 18.1447, "step": 2448 }, { "epoch": 0.04476575209753779, "grad_norm": 6.7859223007249705, "learning_rate": 9.994298190968595e-06, "loss": 17.3404, "step": 2449 }, { "epoch": 0.04478403129398432, "grad_norm": 8.558894441486864, "learning_rate": 9.994284049647123e-06, "loss": 18.4606, "step": 2450 }, { "epoch": 0.04480231049043084, "grad_norm": 7.807862136108618, "learning_rate": 9.99426989082112e-06, "loss": 18.0718, "step": 2451 }, { "epoch": 0.04482058968687736, "grad_norm": 7.627670044603417, "learning_rate": 9.994255714490633e-06, "loss": 17.7779, "step": 2452 }, { "epoch": 0.04483886888332389, "grad_norm": 7.095676428920803, "learning_rate": 9.994241520655713e-06, "loss": 17.7749, "step": 2453 }, { "epoch": 0.04485714807977041, "grad_norm": 8.826666879389675, "learning_rate": 9.99422730931641e-06, "loss": 18.3998, "step": 2454 }, { "epoch": 0.04487542727621694, "grad_norm": 7.622749814638532, "learning_rate": 9.994213080472776e-06, "loss": 17.8817, "step": 2455 }, { "epoch": 0.04489370647266346, "grad_norm": 7.67749884483121, "learning_rate": 9.994198834124856e-06, "loss": 18.1708, "step": 2456 }, { "epoch": 0.044911985669109984, "grad_norm": 7.882270641978379, "learning_rate": 9.994184570272704e-06, "loss": 18.1039, "step": 2457 }, { "epoch": 0.04493026486555651, "grad_norm": 7.53739610912295, "learning_rate": 9.994170288916367e-06, "loss": 17.9699, "step": 2458 }, { "epoch": 0.044948544062003035, "grad_norm": 7.929458391148995, "learning_rate": 9.994155990055897e-06, "loss": 18.0522, "step": 2459 }, { "epoch": 0.044966823258449556, "grad_norm": 8.150928372404985, "learning_rate": 9.994141673691345e-06, "loss": 18.3842, "step": 2460 }, { "epoch": 0.044985102454896085, "grad_norm": 8.197250165025617, "learning_rate": 9.994127339822756e-06, "loss": 18.397, "step": 2461 }, { "epoch": 0.04500338165134261, "grad_norm": 7.0408254789572275, "learning_rate": 9.99411298845019e-06, "loss": 17.6103, "step": 2462 }, { "epoch": 0.04502166084778913, "grad_norm": 9.306036505095154, "learning_rate": 9.994098619573687e-06, "loss": 18.6617, "step": 2463 }, { "epoch": 0.04503994004423566, "grad_norm": 8.719533738544047, "learning_rate": 9.994084233193303e-06, "loss": 18.2127, "step": 2464 }, { "epoch": 0.04505821924068218, "grad_norm": 7.993337299543475, "learning_rate": 9.994069829309086e-06, "loss": 17.7749, "step": 2465 }, { "epoch": 0.0450764984371287, "grad_norm": 8.786320469873472, "learning_rate": 9.994055407921088e-06, "loss": 18.265, "step": 2466 }, { "epoch": 0.04509477763357523, "grad_norm": 8.088655571473474, "learning_rate": 9.99404096902936e-06, "loss": 18.0567, "step": 2467 }, { "epoch": 0.04511305683002175, "grad_norm": 10.785255980731446, "learning_rate": 9.99402651263395e-06, "loss": 18.8973, "step": 2468 }, { "epoch": 0.04513133602646827, "grad_norm": 6.947880353235863, "learning_rate": 9.994012038734912e-06, "loss": 17.7813, "step": 2469 }, { "epoch": 0.0451496152229148, "grad_norm": 8.40840965315626, "learning_rate": 9.993997547332295e-06, "loss": 18.0729, "step": 2470 }, { "epoch": 0.045167894419361324, "grad_norm": 7.141559747847017, "learning_rate": 9.993983038426149e-06, "loss": 17.7627, "step": 2471 }, { "epoch": 0.04518617361580785, "grad_norm": 8.083598178975093, "learning_rate": 9.993968512016528e-06, "loss": 18.0206, "step": 2472 }, { "epoch": 0.045204452812254374, "grad_norm": 7.941864410688481, "learning_rate": 9.993953968103479e-06, "loss": 17.7503, "step": 2473 }, { "epoch": 0.045222732008700896, "grad_norm": 8.276880607497981, "learning_rate": 9.993939406687055e-06, "loss": 18.1408, "step": 2474 }, { "epoch": 0.045241011205147424, "grad_norm": 7.494499518134069, "learning_rate": 9.993924827767306e-06, "loss": 17.717, "step": 2475 }, { "epoch": 0.045259290401593946, "grad_norm": 6.279519489638303, "learning_rate": 9.993910231344286e-06, "loss": 17.3121, "step": 2476 }, { "epoch": 0.04527756959804047, "grad_norm": 6.10448976291681, "learning_rate": 9.993895617418042e-06, "loss": 17.2826, "step": 2477 }, { "epoch": 0.045295848794487, "grad_norm": 8.485248276703604, "learning_rate": 9.993880985988629e-06, "loss": 18.2969, "step": 2478 }, { "epoch": 0.04531412799093352, "grad_norm": 6.956430929645586, "learning_rate": 9.993866337056095e-06, "loss": 17.6805, "step": 2479 }, { "epoch": 0.04533240718738004, "grad_norm": 7.577875930364536, "learning_rate": 9.993851670620494e-06, "loss": 17.957, "step": 2480 }, { "epoch": 0.04535068638382657, "grad_norm": 7.293365459040691, "learning_rate": 9.993836986681876e-06, "loss": 17.8977, "step": 2481 }, { "epoch": 0.04536896558027309, "grad_norm": 10.212845440385083, "learning_rate": 9.99382228524029e-06, "loss": 19.0827, "step": 2482 }, { "epoch": 0.04538724477671961, "grad_norm": 6.5640105097470505, "learning_rate": 9.993807566295793e-06, "loss": 17.4783, "step": 2483 }, { "epoch": 0.04540552397316614, "grad_norm": 7.88718427575359, "learning_rate": 9.993792829848433e-06, "loss": 17.909, "step": 2484 }, { "epoch": 0.04542380316961266, "grad_norm": 7.313171341929883, "learning_rate": 9.993778075898262e-06, "loss": 17.6938, "step": 2485 }, { "epoch": 0.045442082366059185, "grad_norm": 9.18976380206171, "learning_rate": 9.993763304445335e-06, "loss": 18.4729, "step": 2486 }, { "epoch": 0.04546036156250571, "grad_norm": 7.671585186971003, "learning_rate": 9.993748515489698e-06, "loss": 17.5632, "step": 2487 }, { "epoch": 0.045478640758952235, "grad_norm": 7.396646593191951, "learning_rate": 9.993733709031406e-06, "loss": 18.0158, "step": 2488 }, { "epoch": 0.045496919955398764, "grad_norm": 7.30338313789073, "learning_rate": 9.993718885070512e-06, "loss": 17.8184, "step": 2489 }, { "epoch": 0.045515199151845286, "grad_norm": 8.156573329646484, "learning_rate": 9.993704043607065e-06, "loss": 18.0749, "step": 2490 }, { "epoch": 0.04553347834829181, "grad_norm": 7.444852773785196, "learning_rate": 9.99368918464112e-06, "loss": 18.1151, "step": 2491 }, { "epoch": 0.045551757544738336, "grad_norm": 8.574863713539944, "learning_rate": 9.993674308172727e-06, "loss": 18.6178, "step": 2492 }, { "epoch": 0.04557003674118486, "grad_norm": 9.383076273075883, "learning_rate": 9.993659414201939e-06, "loss": 18.8024, "step": 2493 }, { "epoch": 0.04558831593763138, "grad_norm": 7.58726551821705, "learning_rate": 9.993644502728809e-06, "loss": 17.6787, "step": 2494 }, { "epoch": 0.04560659513407791, "grad_norm": 6.9906904839724175, "learning_rate": 9.993629573753387e-06, "loss": 17.6734, "step": 2495 }, { "epoch": 0.04562487433052443, "grad_norm": 7.602778161713747, "learning_rate": 9.993614627275728e-06, "loss": 18.0471, "step": 2496 }, { "epoch": 0.04564315352697095, "grad_norm": 6.743092129784845, "learning_rate": 9.993599663295883e-06, "loss": 17.7301, "step": 2497 }, { "epoch": 0.04566143272341748, "grad_norm": 6.698977437278408, "learning_rate": 9.993584681813904e-06, "loss": 17.4601, "step": 2498 }, { "epoch": 0.045679711919864, "grad_norm": 8.453281591316307, "learning_rate": 9.993569682829843e-06, "loss": 18.4345, "step": 2499 }, { "epoch": 0.045697991116310524, "grad_norm": 10.112200193544616, "learning_rate": 9.993554666343755e-06, "loss": 18.6083, "step": 2500 }, { "epoch": 0.04571627031275705, "grad_norm": 7.121410796053459, "learning_rate": 9.99353963235569e-06, "loss": 17.675, "step": 2501 }, { "epoch": 0.045734549509203574, "grad_norm": 7.625019759673931, "learning_rate": 9.993524580865704e-06, "loss": 17.769, "step": 2502 }, { "epoch": 0.045752828705650096, "grad_norm": 10.04571223137181, "learning_rate": 9.993509511873845e-06, "loss": 18.951, "step": 2503 }, { "epoch": 0.045771107902096625, "grad_norm": 8.376891770038641, "learning_rate": 9.99349442538017e-06, "loss": 18.0827, "step": 2504 }, { "epoch": 0.04578938709854315, "grad_norm": 7.862265599796088, "learning_rate": 9.99347932138473e-06, "loss": 17.9922, "step": 2505 }, { "epoch": 0.045807666294989675, "grad_norm": 8.124258938135004, "learning_rate": 9.993464199887578e-06, "loss": 18.638, "step": 2506 }, { "epoch": 0.0458259454914362, "grad_norm": 7.022653855067991, "learning_rate": 9.993449060888768e-06, "loss": 17.6937, "step": 2507 }, { "epoch": 0.04584422468788272, "grad_norm": 6.996402976687276, "learning_rate": 9.993433904388352e-06, "loss": 17.3463, "step": 2508 }, { "epoch": 0.04586250388432925, "grad_norm": 7.98813478789894, "learning_rate": 9.993418730386384e-06, "loss": 17.7135, "step": 2509 }, { "epoch": 0.04588078308077577, "grad_norm": 7.056808940985014, "learning_rate": 9.993403538882915e-06, "loss": 17.8142, "step": 2510 }, { "epoch": 0.04589906227722229, "grad_norm": 8.740012775396496, "learning_rate": 9.993388329878002e-06, "loss": 18.1227, "step": 2511 }, { "epoch": 0.04591734147366882, "grad_norm": 8.882864774096744, "learning_rate": 9.993373103371695e-06, "loss": 18.1953, "step": 2512 }, { "epoch": 0.04593562067011534, "grad_norm": 7.958453931071923, "learning_rate": 9.993357859364048e-06, "loss": 17.9278, "step": 2513 }, { "epoch": 0.04595389986656186, "grad_norm": 7.776292641995152, "learning_rate": 9.993342597855117e-06, "loss": 18.2712, "step": 2514 }, { "epoch": 0.04597217906300839, "grad_norm": 8.292059225346584, "learning_rate": 9.993327318844952e-06, "loss": 18.0476, "step": 2515 }, { "epoch": 0.045990458259454914, "grad_norm": 8.386415553531005, "learning_rate": 9.993312022333608e-06, "loss": 18.35, "step": 2516 }, { "epoch": 0.046008737455901436, "grad_norm": 8.045486169098941, "learning_rate": 9.99329670832114e-06, "loss": 18.2319, "step": 2517 }, { "epoch": 0.046027016652347964, "grad_norm": 9.232407530293376, "learning_rate": 9.993281376807598e-06, "loss": 18.4699, "step": 2518 }, { "epoch": 0.046045295848794486, "grad_norm": 7.8520732123053225, "learning_rate": 9.99326602779304e-06, "loss": 18.191, "step": 2519 }, { "epoch": 0.04606357504524101, "grad_norm": 7.16087588755416, "learning_rate": 9.993250661277519e-06, "loss": 17.6772, "step": 2520 }, { "epoch": 0.046081854241687537, "grad_norm": 8.187990817383524, "learning_rate": 9.993235277261084e-06, "loss": 18.0416, "step": 2521 }, { "epoch": 0.04610013343813406, "grad_norm": 7.291193162948097, "learning_rate": 9.993219875743796e-06, "loss": 17.7921, "step": 2522 }, { "epoch": 0.04611841263458059, "grad_norm": 7.269722958984533, "learning_rate": 9.993204456725703e-06, "loss": 17.7402, "step": 2523 }, { "epoch": 0.04613669183102711, "grad_norm": 9.39702523786115, "learning_rate": 9.993189020206863e-06, "loss": 18.7017, "step": 2524 }, { "epoch": 0.04615497102747363, "grad_norm": 8.422467668553633, "learning_rate": 9.99317356618733e-06, "loss": 18.2036, "step": 2525 }, { "epoch": 0.04617325022392016, "grad_norm": 7.692280567456548, "learning_rate": 9.993158094667155e-06, "loss": 17.9819, "step": 2526 }, { "epoch": 0.04619152942036668, "grad_norm": 8.8049564707133, "learning_rate": 9.993142605646395e-06, "loss": 18.1562, "step": 2527 }, { "epoch": 0.0462098086168132, "grad_norm": 7.517125465847618, "learning_rate": 9.993127099125104e-06, "loss": 17.8768, "step": 2528 }, { "epoch": 0.04622808781325973, "grad_norm": 7.10833132981136, "learning_rate": 9.993111575103334e-06, "loss": 17.7035, "step": 2529 }, { "epoch": 0.04624636700970625, "grad_norm": 8.616302291529108, "learning_rate": 9.993096033581143e-06, "loss": 18.3784, "step": 2530 }, { "epoch": 0.046264646206152775, "grad_norm": 8.768140196492558, "learning_rate": 9.993080474558584e-06, "loss": 18.3982, "step": 2531 }, { "epoch": 0.046282925402599304, "grad_norm": 8.069596110815922, "learning_rate": 9.993064898035709e-06, "loss": 18.495, "step": 2532 }, { "epoch": 0.046301204599045825, "grad_norm": 6.436102127592379, "learning_rate": 9.993049304012576e-06, "loss": 17.221, "step": 2533 }, { "epoch": 0.04631948379549235, "grad_norm": 9.965611577331051, "learning_rate": 9.993033692489239e-06, "loss": 19.1679, "step": 2534 }, { "epoch": 0.046337762991938876, "grad_norm": 8.699977371157395, "learning_rate": 9.993018063465751e-06, "loss": 18.0354, "step": 2535 }, { "epoch": 0.0463560421883854, "grad_norm": 7.766489544229284, "learning_rate": 9.993002416942171e-06, "loss": 18.1615, "step": 2536 }, { "epoch": 0.04637432138483192, "grad_norm": 7.966456947663706, "learning_rate": 9.992986752918548e-06, "loss": 18.0083, "step": 2537 }, { "epoch": 0.04639260058127845, "grad_norm": 7.8614001915835265, "learning_rate": 9.99297107139494e-06, "loss": 18.0716, "step": 2538 }, { "epoch": 0.04641087977772497, "grad_norm": 7.738265008664061, "learning_rate": 9.9929553723714e-06, "loss": 17.7186, "step": 2539 }, { "epoch": 0.0464291589741715, "grad_norm": 7.755710041382345, "learning_rate": 9.992939655847988e-06, "loss": 17.9862, "step": 2540 }, { "epoch": 0.04644743817061802, "grad_norm": 7.007740920005631, "learning_rate": 9.992923921824754e-06, "loss": 17.6481, "step": 2541 }, { "epoch": 0.04646571736706454, "grad_norm": 7.8676222835982506, "learning_rate": 9.992908170301755e-06, "loss": 17.9741, "step": 2542 }, { "epoch": 0.04648399656351107, "grad_norm": 8.11514801090296, "learning_rate": 9.992892401279047e-06, "loss": 18.0688, "step": 2543 }, { "epoch": 0.04650227575995759, "grad_norm": 7.784713886404579, "learning_rate": 9.992876614756684e-06, "loss": 18.4181, "step": 2544 }, { "epoch": 0.046520554956404114, "grad_norm": 7.55659106472757, "learning_rate": 9.992860810734722e-06, "loss": 17.849, "step": 2545 }, { "epoch": 0.04653883415285064, "grad_norm": 8.141532762416936, "learning_rate": 9.992844989213213e-06, "loss": 18.0762, "step": 2546 }, { "epoch": 0.046557113349297165, "grad_norm": 7.9981572386877495, "learning_rate": 9.99282915019222e-06, "loss": 18.0064, "step": 2547 }, { "epoch": 0.04657539254574369, "grad_norm": 5.807312095679033, "learning_rate": 9.992813293671792e-06, "loss": 17.2469, "step": 2548 }, { "epoch": 0.046593671742190215, "grad_norm": 8.14120649526761, "learning_rate": 9.992797419651988e-06, "loss": 18.2199, "step": 2549 }, { "epoch": 0.04661195093863674, "grad_norm": 7.693158337588116, "learning_rate": 9.99278152813286e-06, "loss": 17.988, "step": 2550 }, { "epoch": 0.04663023013508326, "grad_norm": 8.492286261870182, "learning_rate": 9.992765619114467e-06, "loss": 18.3276, "step": 2551 }, { "epoch": 0.04664850933152979, "grad_norm": 7.146893703259297, "learning_rate": 9.992749692596863e-06, "loss": 17.8245, "step": 2552 }, { "epoch": 0.04666678852797631, "grad_norm": 7.384159350830155, "learning_rate": 9.992733748580108e-06, "loss": 17.9852, "step": 2553 }, { "epoch": 0.04668506772442283, "grad_norm": 8.624144394164366, "learning_rate": 9.99271778706425e-06, "loss": 17.6288, "step": 2554 }, { "epoch": 0.04670334692086936, "grad_norm": 7.930266695448252, "learning_rate": 9.992701808049351e-06, "loss": 18.2687, "step": 2555 }, { "epoch": 0.04672162611731588, "grad_norm": 7.435136965216283, "learning_rate": 9.992685811535467e-06, "loss": 17.8033, "step": 2556 }, { "epoch": 0.04673990531376241, "grad_norm": 7.599543264066056, "learning_rate": 9.992669797522652e-06, "loss": 17.7481, "step": 2557 }, { "epoch": 0.04675818451020893, "grad_norm": 8.429798985553415, "learning_rate": 9.992653766010961e-06, "loss": 18.4653, "step": 2558 }, { "epoch": 0.046776463706655454, "grad_norm": 8.076173987447698, "learning_rate": 9.992637717000453e-06, "loss": 18.2065, "step": 2559 }, { "epoch": 0.04679474290310198, "grad_norm": 8.781536678777618, "learning_rate": 9.992621650491184e-06, "loss": 18.5883, "step": 2560 }, { "epoch": 0.046813022099548504, "grad_norm": 7.444958861920365, "learning_rate": 9.992605566483206e-06, "loss": 17.9761, "step": 2561 }, { "epoch": 0.046831301295995026, "grad_norm": 8.423244032545847, "learning_rate": 9.992589464976581e-06, "loss": 18.1584, "step": 2562 }, { "epoch": 0.046849580492441555, "grad_norm": 7.3249632914321054, "learning_rate": 9.992573345971364e-06, "loss": 18.007, "step": 2563 }, { "epoch": 0.046867859688888076, "grad_norm": 8.690883457836394, "learning_rate": 9.99255720946761e-06, "loss": 18.4969, "step": 2564 }, { "epoch": 0.0468861388853346, "grad_norm": 8.766408683723455, "learning_rate": 9.992541055465374e-06, "loss": 18.6329, "step": 2565 }, { "epoch": 0.04690441808178113, "grad_norm": 7.732559015449722, "learning_rate": 9.992524883964717e-06, "loss": 18.4675, "step": 2566 }, { "epoch": 0.04692269727822765, "grad_norm": 11.206808671649624, "learning_rate": 9.992508694965695e-06, "loss": 19.42, "step": 2567 }, { "epoch": 0.04694097647467417, "grad_norm": 6.774508301080423, "learning_rate": 9.99249248846836e-06, "loss": 17.5349, "step": 2568 }, { "epoch": 0.0469592556711207, "grad_norm": 7.27514549721159, "learning_rate": 9.992476264472774e-06, "loss": 17.8747, "step": 2569 }, { "epoch": 0.04697753486756722, "grad_norm": 7.423724748051973, "learning_rate": 9.992460022978991e-06, "loss": 17.8545, "step": 2570 }, { "epoch": 0.04699581406401374, "grad_norm": 8.096889005274987, "learning_rate": 9.99244376398707e-06, "loss": 18.4889, "step": 2571 }, { "epoch": 0.04701409326046027, "grad_norm": 7.674387346092054, "learning_rate": 9.992427487497065e-06, "loss": 18.1159, "step": 2572 }, { "epoch": 0.04703237245690679, "grad_norm": 7.788396641401589, "learning_rate": 9.992411193509037e-06, "loss": 17.8568, "step": 2573 }, { "epoch": 0.04705065165335332, "grad_norm": 7.8762417225446395, "learning_rate": 9.99239488202304e-06, "loss": 18.0088, "step": 2574 }, { "epoch": 0.047068930849799843, "grad_norm": 7.238786197429148, "learning_rate": 9.992378553039132e-06, "loss": 17.8319, "step": 2575 }, { "epoch": 0.047087210046246365, "grad_norm": 7.068213361844402, "learning_rate": 9.99236220655737e-06, "loss": 17.73, "step": 2576 }, { "epoch": 0.047105489242692894, "grad_norm": 8.780673471941459, "learning_rate": 9.992345842577814e-06, "loss": 18.3554, "step": 2577 }, { "epoch": 0.047123768439139416, "grad_norm": 8.159953183875249, "learning_rate": 9.992329461100517e-06, "loss": 17.8448, "step": 2578 }, { "epoch": 0.04714204763558594, "grad_norm": 7.723023562155313, "learning_rate": 9.992313062125538e-06, "loss": 17.7103, "step": 2579 }, { "epoch": 0.047160326832032466, "grad_norm": 7.616277240305632, "learning_rate": 9.992296645652934e-06, "loss": 17.9065, "step": 2580 }, { "epoch": 0.04717860602847899, "grad_norm": 7.571498465607509, "learning_rate": 9.992280211682766e-06, "loss": 17.9898, "step": 2581 }, { "epoch": 0.04719688522492551, "grad_norm": 9.024191217366022, "learning_rate": 9.992263760215089e-06, "loss": 18.402, "step": 2582 }, { "epoch": 0.04721516442137204, "grad_norm": 7.7871098173690125, "learning_rate": 9.992247291249959e-06, "loss": 18.2878, "step": 2583 }, { "epoch": 0.04723344361781856, "grad_norm": 8.830236557374969, "learning_rate": 9.992230804787438e-06, "loss": 17.8729, "step": 2584 }, { "epoch": 0.04725172281426508, "grad_norm": 7.122543761547274, "learning_rate": 9.992214300827579e-06, "loss": 17.8097, "step": 2585 }, { "epoch": 0.04727000201071161, "grad_norm": 7.615968526497754, "learning_rate": 9.992197779370443e-06, "loss": 17.6749, "step": 2586 }, { "epoch": 0.04728828120715813, "grad_norm": 6.58962357922287, "learning_rate": 9.992181240416086e-06, "loss": 17.2602, "step": 2587 }, { "epoch": 0.047306560403604654, "grad_norm": 7.1704096445101335, "learning_rate": 9.992164683964567e-06, "loss": 17.594, "step": 2588 }, { "epoch": 0.04732483960005118, "grad_norm": 7.340001869666006, "learning_rate": 9.992148110015944e-06, "loss": 17.9317, "step": 2589 }, { "epoch": 0.047343118796497705, "grad_norm": 7.094104168318126, "learning_rate": 9.992131518570276e-06, "loss": 17.5242, "step": 2590 }, { "epoch": 0.04736139799294423, "grad_norm": 8.661227238066143, "learning_rate": 9.99211490962762e-06, "loss": 18.1788, "step": 2591 }, { "epoch": 0.047379677189390755, "grad_norm": 9.758540631505968, "learning_rate": 9.992098283188034e-06, "loss": 18.3244, "step": 2592 }, { "epoch": 0.04739795638583728, "grad_norm": 7.213278668106901, "learning_rate": 9.992081639251578e-06, "loss": 17.8225, "step": 2593 }, { "epoch": 0.047416235582283806, "grad_norm": 7.203090426209105, "learning_rate": 9.992064977818307e-06, "loss": 17.7184, "step": 2594 }, { "epoch": 0.04743451477873033, "grad_norm": 7.4925183784994065, "learning_rate": 9.992048298888284e-06, "loss": 17.9008, "step": 2595 }, { "epoch": 0.04745279397517685, "grad_norm": 7.907230747099184, "learning_rate": 9.992031602461562e-06, "loss": 18.0021, "step": 2596 }, { "epoch": 0.04747107317162338, "grad_norm": 7.467349716623535, "learning_rate": 9.992014888538204e-06, "loss": 17.9413, "step": 2597 }, { "epoch": 0.0474893523680699, "grad_norm": 7.353131603693356, "learning_rate": 9.991998157118267e-06, "loss": 17.9257, "step": 2598 }, { "epoch": 0.04750763156451642, "grad_norm": 8.172457199217725, "learning_rate": 9.99198140820181e-06, "loss": 18.531, "step": 2599 }, { "epoch": 0.04752591076096295, "grad_norm": 7.448640737510543, "learning_rate": 9.991964641788892e-06, "loss": 17.7291, "step": 2600 }, { "epoch": 0.04754418995740947, "grad_norm": 7.752089224294541, "learning_rate": 9.991947857879569e-06, "loss": 18.0091, "step": 2601 }, { "epoch": 0.047562469153855993, "grad_norm": 8.286655052760677, "learning_rate": 9.991931056473902e-06, "loss": 18.1219, "step": 2602 }, { "epoch": 0.04758074835030252, "grad_norm": 7.051573843177877, "learning_rate": 9.991914237571952e-06, "loss": 17.877, "step": 2603 }, { "epoch": 0.047599027546749044, "grad_norm": 7.29651890949779, "learning_rate": 9.991897401173774e-06, "loss": 17.7525, "step": 2604 }, { "epoch": 0.047617306743195566, "grad_norm": 6.765985640866973, "learning_rate": 9.99188054727943e-06, "loss": 17.6648, "step": 2605 }, { "epoch": 0.047635585939642094, "grad_norm": 8.575109016142505, "learning_rate": 9.991863675888977e-06, "loss": 18.3309, "step": 2606 }, { "epoch": 0.047653865136088616, "grad_norm": 8.388452131734812, "learning_rate": 9.991846787002474e-06, "loss": 18.1312, "step": 2607 }, { "epoch": 0.047672144332535145, "grad_norm": 8.464403390656695, "learning_rate": 9.991829880619983e-06, "loss": 18.205, "step": 2608 }, { "epoch": 0.04769042352898167, "grad_norm": 7.73207336863152, "learning_rate": 9.99181295674156e-06, "loss": 18.0825, "step": 2609 }, { "epoch": 0.04770870272542819, "grad_norm": 7.234080259893069, "learning_rate": 9.991796015367267e-06, "loss": 17.675, "step": 2610 }, { "epoch": 0.04772698192187472, "grad_norm": 8.487742452379393, "learning_rate": 9.991779056497161e-06, "loss": 18.1511, "step": 2611 }, { "epoch": 0.04774526111832124, "grad_norm": 9.64341880092905, "learning_rate": 9.991762080131305e-06, "loss": 18.5887, "step": 2612 }, { "epoch": 0.04776354031476776, "grad_norm": 6.820414549309318, "learning_rate": 9.991745086269753e-06, "loss": 17.8486, "step": 2613 }, { "epoch": 0.04778181951121429, "grad_norm": 7.205085987664576, "learning_rate": 9.99172807491257e-06, "loss": 18.1047, "step": 2614 }, { "epoch": 0.04780009870766081, "grad_norm": 8.163994358227953, "learning_rate": 9.991711046059812e-06, "loss": 18.2374, "step": 2615 }, { "epoch": 0.04781837790410733, "grad_norm": 8.120598451868254, "learning_rate": 9.991693999711541e-06, "loss": 17.9739, "step": 2616 }, { "epoch": 0.04783665710055386, "grad_norm": 8.265683503522936, "learning_rate": 9.991676935867813e-06, "loss": 18.0476, "step": 2617 }, { "epoch": 0.04785493629700038, "grad_norm": 8.903684856768098, "learning_rate": 9.991659854528693e-06, "loss": 18.1277, "step": 2618 }, { "epoch": 0.047873215493446905, "grad_norm": 7.700767221730183, "learning_rate": 9.991642755694238e-06, "loss": 17.9356, "step": 2619 }, { "epoch": 0.047891494689893434, "grad_norm": 8.101046234456692, "learning_rate": 9.991625639364508e-06, "loss": 18.0207, "step": 2620 }, { "epoch": 0.047909773886339956, "grad_norm": 7.70355085855746, "learning_rate": 9.991608505539563e-06, "loss": 18.1949, "step": 2621 }, { "epoch": 0.04792805308278648, "grad_norm": 7.208674369499795, "learning_rate": 9.991591354219463e-06, "loss": 17.9747, "step": 2622 }, { "epoch": 0.047946332279233006, "grad_norm": 7.9057625499185615, "learning_rate": 9.991574185404269e-06, "loss": 18.1491, "step": 2623 }, { "epoch": 0.04796461147567953, "grad_norm": 8.137422512066872, "learning_rate": 9.99155699909404e-06, "loss": 17.7673, "step": 2624 }, { "epoch": 0.047982890672126056, "grad_norm": 6.801004677122794, "learning_rate": 9.991539795288839e-06, "loss": 17.7284, "step": 2625 }, { "epoch": 0.04800116986857258, "grad_norm": 7.036059525082331, "learning_rate": 9.991522573988723e-06, "loss": 17.792, "step": 2626 }, { "epoch": 0.0480194490650191, "grad_norm": 6.357666108609581, "learning_rate": 9.991505335193752e-06, "loss": 17.4053, "step": 2627 }, { "epoch": 0.04803772826146563, "grad_norm": 6.981203675681132, "learning_rate": 9.99148807890399e-06, "loss": 17.5025, "step": 2628 }, { "epoch": 0.04805600745791215, "grad_norm": 7.709372655406259, "learning_rate": 9.991470805119494e-06, "loss": 18.0022, "step": 2629 }, { "epoch": 0.04807428665435867, "grad_norm": 8.657897652446362, "learning_rate": 9.991453513840327e-06, "loss": 18.302, "step": 2630 }, { "epoch": 0.0480925658508052, "grad_norm": 8.339666057920908, "learning_rate": 9.991436205066549e-06, "loss": 18.4404, "step": 2631 }, { "epoch": 0.04811084504725172, "grad_norm": 8.137596756990114, "learning_rate": 9.991418878798219e-06, "loss": 18.1143, "step": 2632 }, { "epoch": 0.048129124243698244, "grad_norm": 7.829696040410456, "learning_rate": 9.991401535035398e-06, "loss": 17.6556, "step": 2633 }, { "epoch": 0.04814740344014477, "grad_norm": 7.93700230501803, "learning_rate": 9.99138417377815e-06, "loss": 17.9485, "step": 2634 }, { "epoch": 0.048165682636591295, "grad_norm": 7.596326532724841, "learning_rate": 9.991366795026533e-06, "loss": 17.8489, "step": 2635 }, { "epoch": 0.04818396183303782, "grad_norm": 7.7847163418671865, "learning_rate": 9.991349398780607e-06, "loss": 18.0203, "step": 2636 }, { "epoch": 0.048202241029484345, "grad_norm": 8.30779716323521, "learning_rate": 9.991331985040437e-06, "loss": 18.1566, "step": 2637 }, { "epoch": 0.04822052022593087, "grad_norm": 7.767510160412045, "learning_rate": 9.991314553806079e-06, "loss": 17.8402, "step": 2638 }, { "epoch": 0.04823879942237739, "grad_norm": 24.00078297240223, "learning_rate": 9.9912971050776e-06, "loss": 19.0444, "step": 2639 }, { "epoch": 0.04825707861882392, "grad_norm": 8.314372895709644, "learning_rate": 9.991279638855056e-06, "loss": 18.3847, "step": 2640 }, { "epoch": 0.04827535781527044, "grad_norm": 6.9919850584153265, "learning_rate": 9.991262155138507e-06, "loss": 18.0172, "step": 2641 }, { "epoch": 0.04829363701171697, "grad_norm": 7.086213799597908, "learning_rate": 9.991244653928021e-06, "loss": 17.582, "step": 2642 }, { "epoch": 0.04831191620816349, "grad_norm": 8.058177867690771, "learning_rate": 9.991227135223656e-06, "loss": 18.4592, "step": 2643 }, { "epoch": 0.04833019540461001, "grad_norm": 7.769721264013488, "learning_rate": 9.991209599025471e-06, "loss": 18.0455, "step": 2644 }, { "epoch": 0.04834847460105654, "grad_norm": 7.232355571336095, "learning_rate": 9.991192045333529e-06, "loss": 17.6612, "step": 2645 }, { "epoch": 0.04836675379750306, "grad_norm": 8.41234655633902, "learning_rate": 9.991174474147892e-06, "loss": 18.1701, "step": 2646 }, { "epoch": 0.048385032993949584, "grad_norm": 8.208951686838457, "learning_rate": 9.991156885468626e-06, "loss": 18.4221, "step": 2647 }, { "epoch": 0.04840331219039611, "grad_norm": 6.2405343761220315, "learning_rate": 9.991139279295784e-06, "loss": 17.4688, "step": 2648 }, { "epoch": 0.048421591386842634, "grad_norm": 8.138753409541787, "learning_rate": 9.991121655629433e-06, "loss": 18.0926, "step": 2649 }, { "epoch": 0.048439870583289156, "grad_norm": 7.509042346002413, "learning_rate": 9.991104014469633e-06, "loss": 18.0035, "step": 2650 }, { "epoch": 0.048458149779735685, "grad_norm": 9.058818337912584, "learning_rate": 9.991086355816445e-06, "loss": 18.5006, "step": 2651 }, { "epoch": 0.048476428976182206, "grad_norm": 7.074231101969972, "learning_rate": 9.991068679669936e-06, "loss": 17.4708, "step": 2652 }, { "epoch": 0.04849470817262873, "grad_norm": 8.563493350493948, "learning_rate": 9.991050986030163e-06, "loss": 18.3651, "step": 2653 }, { "epoch": 0.04851298736907526, "grad_norm": 7.195424460601591, "learning_rate": 9.991033274897189e-06, "loss": 17.8509, "step": 2654 }, { "epoch": 0.04853126656552178, "grad_norm": 7.36165428435057, "learning_rate": 9.991015546271076e-06, "loss": 17.838, "step": 2655 }, { "epoch": 0.0485495457619683, "grad_norm": 7.307704359588099, "learning_rate": 9.990997800151888e-06, "loss": 17.606, "step": 2656 }, { "epoch": 0.04856782495841483, "grad_norm": 7.144856676722134, "learning_rate": 9.990980036539683e-06, "loss": 17.7495, "step": 2657 }, { "epoch": 0.04858610415486135, "grad_norm": 8.362609901603209, "learning_rate": 9.99096225543453e-06, "loss": 18.3898, "step": 2658 }, { "epoch": 0.04860438335130788, "grad_norm": 7.095504670362594, "learning_rate": 9.990944456836485e-06, "loss": 17.6543, "step": 2659 }, { "epoch": 0.0486226625477544, "grad_norm": 7.662936816391292, "learning_rate": 9.990926640745612e-06, "loss": 17.8874, "step": 2660 }, { "epoch": 0.04864094174420092, "grad_norm": 7.528980619883442, "learning_rate": 9.990908807161976e-06, "loss": 17.9651, "step": 2661 }, { "epoch": 0.04865922094064745, "grad_norm": 7.575553019127141, "learning_rate": 9.990890956085638e-06, "loss": 18.044, "step": 2662 }, { "epoch": 0.048677500137093974, "grad_norm": 6.732173943432213, "learning_rate": 9.99087308751666e-06, "loss": 17.5589, "step": 2663 }, { "epoch": 0.048695779333540495, "grad_norm": 7.923031811798792, "learning_rate": 9.990855201455104e-06, "loss": 18.3133, "step": 2664 }, { "epoch": 0.048714058529987024, "grad_norm": 8.057810619771036, "learning_rate": 9.990837297901034e-06, "loss": 18.3014, "step": 2665 }, { "epoch": 0.048732337726433546, "grad_norm": 8.077171777686797, "learning_rate": 9.990819376854512e-06, "loss": 18.3852, "step": 2666 }, { "epoch": 0.04875061692288007, "grad_norm": 6.893345504430657, "learning_rate": 9.990801438315603e-06, "loss": 17.6469, "step": 2667 }, { "epoch": 0.048768896119326596, "grad_norm": 7.148773305250935, "learning_rate": 9.990783482284366e-06, "loss": 17.9787, "step": 2668 }, { "epoch": 0.04878717531577312, "grad_norm": 7.814760911182298, "learning_rate": 9.990765508760867e-06, "loss": 17.8342, "step": 2669 }, { "epoch": 0.04880545451221964, "grad_norm": 7.085645296399639, "learning_rate": 9.990747517745168e-06, "loss": 17.7406, "step": 2670 }, { "epoch": 0.04882373370866617, "grad_norm": 7.047769382770596, "learning_rate": 9.99072950923733e-06, "loss": 17.857, "step": 2671 }, { "epoch": 0.04884201290511269, "grad_norm": 8.144642378718615, "learning_rate": 9.99071148323742e-06, "loss": 18.29, "step": 2672 }, { "epoch": 0.04886029210155921, "grad_norm": 9.409388533866025, "learning_rate": 9.9906934397455e-06, "loss": 18.1425, "step": 2673 }, { "epoch": 0.04887857129800574, "grad_norm": 7.787783845583783, "learning_rate": 9.99067537876163e-06, "loss": 17.5491, "step": 2674 }, { "epoch": 0.04889685049445226, "grad_norm": 7.101698957878979, "learning_rate": 9.990657300285878e-06, "loss": 17.5371, "step": 2675 }, { "epoch": 0.04891512969089879, "grad_norm": 7.236285902937417, "learning_rate": 9.990639204318305e-06, "loss": 17.491, "step": 2676 }, { "epoch": 0.04893340888734531, "grad_norm": 6.455738269410737, "learning_rate": 9.990621090858974e-06, "loss": 17.3108, "step": 2677 }, { "epoch": 0.048951688083791835, "grad_norm": 7.938276799312635, "learning_rate": 9.99060295990795e-06, "loss": 18.1582, "step": 2678 }, { "epoch": 0.04896996728023836, "grad_norm": 7.514158271321571, "learning_rate": 9.990584811465295e-06, "loss": 17.871, "step": 2679 }, { "epoch": 0.048988246476684885, "grad_norm": 7.4360056843811755, "learning_rate": 9.990566645531074e-06, "loss": 17.8645, "step": 2680 }, { "epoch": 0.04900652567313141, "grad_norm": 7.44016188929528, "learning_rate": 9.99054846210535e-06, "loss": 17.5871, "step": 2681 }, { "epoch": 0.049024804869577936, "grad_norm": 7.422628097035798, "learning_rate": 9.990530261188185e-06, "loss": 17.8896, "step": 2682 }, { "epoch": 0.04904308406602446, "grad_norm": 8.248708541352205, "learning_rate": 9.990512042779646e-06, "loss": 18.092, "step": 2683 }, { "epoch": 0.04906136326247098, "grad_norm": 9.317770876223529, "learning_rate": 9.990493806879795e-06, "loss": 18.4137, "step": 2684 }, { "epoch": 0.04907964245891751, "grad_norm": 8.191308927578604, "learning_rate": 9.990475553488697e-06, "loss": 18.0061, "step": 2685 }, { "epoch": 0.04909792165536403, "grad_norm": 8.381473981870313, "learning_rate": 9.990457282606413e-06, "loss": 18.1359, "step": 2686 }, { "epoch": 0.04911620085181055, "grad_norm": 8.144436950856923, "learning_rate": 9.990438994233012e-06, "loss": 17.9591, "step": 2687 }, { "epoch": 0.04913448004825708, "grad_norm": 9.027632766190614, "learning_rate": 9.990420688368553e-06, "loss": 18.4355, "step": 2688 }, { "epoch": 0.0491527592447036, "grad_norm": 8.154001017546962, "learning_rate": 9.990402365013106e-06, "loss": 18.1974, "step": 2689 }, { "epoch": 0.049171038441150124, "grad_norm": 7.254699510203792, "learning_rate": 9.990384024166729e-06, "loss": 17.431, "step": 2690 }, { "epoch": 0.04918931763759665, "grad_norm": 6.519549556102076, "learning_rate": 9.990365665829489e-06, "loss": 17.2872, "step": 2691 }, { "epoch": 0.049207596834043174, "grad_norm": 8.202965101946765, "learning_rate": 9.99034729000145e-06, "loss": 18.3197, "step": 2692 }, { "epoch": 0.0492258760304897, "grad_norm": 6.895803622581931, "learning_rate": 9.990328896682678e-06, "loss": 17.5714, "step": 2693 }, { "epoch": 0.049244155226936225, "grad_norm": 6.846212451053361, "learning_rate": 9.990310485873236e-06, "loss": 17.408, "step": 2694 }, { "epoch": 0.049262434423382746, "grad_norm": 8.374596122436799, "learning_rate": 9.990292057573188e-06, "loss": 18.2443, "step": 2695 }, { "epoch": 0.049280713619829275, "grad_norm": 8.820118880435693, "learning_rate": 9.990273611782602e-06, "loss": 18.427, "step": 2696 }, { "epoch": 0.0492989928162758, "grad_norm": 7.0649054904768995, "learning_rate": 9.990255148501537e-06, "loss": 17.8637, "step": 2697 }, { "epoch": 0.04931727201272232, "grad_norm": 7.902482265181302, "learning_rate": 9.990236667730062e-06, "loss": 18.1622, "step": 2698 }, { "epoch": 0.04933555120916885, "grad_norm": 7.266759864899283, "learning_rate": 9.99021816946824e-06, "loss": 17.6958, "step": 2699 }, { "epoch": 0.04935383040561537, "grad_norm": 8.263652197088238, "learning_rate": 9.990199653716138e-06, "loss": 18.38, "step": 2700 }, { "epoch": 0.04937210960206189, "grad_norm": 8.477531153024145, "learning_rate": 9.990181120473816e-06, "loss": 18.3485, "step": 2701 }, { "epoch": 0.04939038879850842, "grad_norm": 7.514234356040806, "learning_rate": 9.990162569741346e-06, "loss": 17.9061, "step": 2702 }, { "epoch": 0.04940866799495494, "grad_norm": 8.407196745090438, "learning_rate": 9.990144001518787e-06, "loss": 18.3472, "step": 2703 }, { "epoch": 0.04942694719140146, "grad_norm": 7.354320770017759, "learning_rate": 9.990125415806207e-06, "loss": 17.6776, "step": 2704 }, { "epoch": 0.04944522638784799, "grad_norm": 7.396485600416532, "learning_rate": 9.990106812603671e-06, "loss": 17.9695, "step": 2705 }, { "epoch": 0.04946350558429451, "grad_norm": 7.814055803305334, "learning_rate": 9.990088191911242e-06, "loss": 18.1308, "step": 2706 }, { "epoch": 0.049481784780741035, "grad_norm": 7.437319022808082, "learning_rate": 9.990069553728986e-06, "loss": 17.9965, "step": 2707 }, { "epoch": 0.049500063977187564, "grad_norm": 7.669524625555445, "learning_rate": 9.990050898056973e-06, "loss": 18.1744, "step": 2708 }, { "epoch": 0.049518343173634086, "grad_norm": 7.536290606373893, "learning_rate": 9.99003222489526e-06, "loss": 18.0637, "step": 2709 }, { "epoch": 0.049536622370080614, "grad_norm": 8.743718557438587, "learning_rate": 9.99001353424392e-06, "loss": 18.8466, "step": 2710 }, { "epoch": 0.049554901566527136, "grad_norm": 8.740644797272857, "learning_rate": 9.989994826103017e-06, "loss": 18.5599, "step": 2711 }, { "epoch": 0.04957318076297366, "grad_norm": 6.463562885126097, "learning_rate": 9.989976100472612e-06, "loss": 17.3284, "step": 2712 }, { "epoch": 0.04959145995942019, "grad_norm": 7.729746826931038, "learning_rate": 9.989957357352775e-06, "loss": 18.1162, "step": 2713 }, { "epoch": 0.04960973915586671, "grad_norm": 8.397131288738244, "learning_rate": 9.989938596743571e-06, "loss": 18.1535, "step": 2714 }, { "epoch": 0.04962801835231323, "grad_norm": 7.299517582885823, "learning_rate": 9.989919818645066e-06, "loss": 17.8696, "step": 2715 }, { "epoch": 0.04964629754875976, "grad_norm": 6.118793825633256, "learning_rate": 9.989901023057323e-06, "loss": 17.3392, "step": 2716 }, { "epoch": 0.04966457674520628, "grad_norm": 8.087623894782654, "learning_rate": 9.989882209980411e-06, "loss": 18.0371, "step": 2717 }, { "epoch": 0.0496828559416528, "grad_norm": 7.182842784130826, "learning_rate": 9.989863379414395e-06, "loss": 17.953, "step": 2718 }, { "epoch": 0.04970113513809933, "grad_norm": 7.789297634414667, "learning_rate": 9.98984453135934e-06, "loss": 18.2924, "step": 2719 }, { "epoch": 0.04971941433454585, "grad_norm": 7.484815689910988, "learning_rate": 9.989825665815314e-06, "loss": 17.7452, "step": 2720 }, { "epoch": 0.049737693530992375, "grad_norm": 8.464123159713012, "learning_rate": 9.989806782782381e-06, "loss": 18.1465, "step": 2721 }, { "epoch": 0.0497559727274389, "grad_norm": 7.29323077246332, "learning_rate": 9.989787882260609e-06, "loss": 17.7274, "step": 2722 }, { "epoch": 0.049774251923885425, "grad_norm": 7.370170116436082, "learning_rate": 9.989768964250062e-06, "loss": 17.826, "step": 2723 }, { "epoch": 0.04979253112033195, "grad_norm": 8.555492267977986, "learning_rate": 9.98975002875081e-06, "loss": 18.3993, "step": 2724 }, { "epoch": 0.049810810316778475, "grad_norm": 7.738168820238301, "learning_rate": 9.989731075762916e-06, "loss": 18.149, "step": 2725 }, { "epoch": 0.049829089513225, "grad_norm": 6.684681111753755, "learning_rate": 9.989712105286446e-06, "loss": 17.6446, "step": 2726 }, { "epoch": 0.049847368709671526, "grad_norm": 7.423002418597444, "learning_rate": 9.98969311732147e-06, "loss": 17.9281, "step": 2727 }, { "epoch": 0.04986564790611805, "grad_norm": 7.046937825717382, "learning_rate": 9.989674111868052e-06, "loss": 17.6348, "step": 2728 }, { "epoch": 0.04988392710256457, "grad_norm": 7.3771534692869, "learning_rate": 9.98965508892626e-06, "loss": 17.5365, "step": 2729 }, { "epoch": 0.0499022062990111, "grad_norm": 7.3746896958532995, "learning_rate": 9.98963604849616e-06, "loss": 17.8783, "step": 2730 }, { "epoch": 0.04992048549545762, "grad_norm": 7.743231553670266, "learning_rate": 9.989616990577817e-06, "loss": 17.9597, "step": 2731 }, { "epoch": 0.04993876469190414, "grad_norm": 9.20320942441712, "learning_rate": 9.9895979151713e-06, "loss": 18.9336, "step": 2732 }, { "epoch": 0.04995704388835067, "grad_norm": 7.675649772850519, "learning_rate": 9.989578822276678e-06, "loss": 18.2667, "step": 2733 }, { "epoch": 0.04997532308479719, "grad_norm": 7.708890407167971, "learning_rate": 9.989559711894012e-06, "loss": 18.1385, "step": 2734 }, { "epoch": 0.049993602281243714, "grad_norm": 6.9503072081652215, "learning_rate": 9.989540584023374e-06, "loss": 17.496, "step": 2735 }, { "epoch": 0.05001188147769024, "grad_norm": 7.330806638692699, "learning_rate": 9.989521438664828e-06, "loss": 17.7176, "step": 2736 }, { "epoch": 0.050030160674136764, "grad_norm": 9.208583128486223, "learning_rate": 9.989502275818443e-06, "loss": 18.4525, "step": 2737 }, { "epoch": 0.050048439870583286, "grad_norm": 6.781366804392853, "learning_rate": 9.989483095484284e-06, "loss": 17.7662, "step": 2738 }, { "epoch": 0.050066719067029815, "grad_norm": 7.084519030626298, "learning_rate": 9.989463897662421e-06, "loss": 17.546, "step": 2739 }, { "epoch": 0.05008499826347634, "grad_norm": 6.586986390290971, "learning_rate": 9.98944468235292e-06, "loss": 17.7685, "step": 2740 }, { "epoch": 0.05010327745992286, "grad_norm": 9.214525055319351, "learning_rate": 9.989425449555848e-06, "loss": 18.6747, "step": 2741 }, { "epoch": 0.05012155665636939, "grad_norm": 8.345511864208909, "learning_rate": 9.989406199271273e-06, "loss": 18.0107, "step": 2742 }, { "epoch": 0.05013983585281591, "grad_norm": 6.7572346177421965, "learning_rate": 9.989386931499264e-06, "loss": 17.6164, "step": 2743 }, { "epoch": 0.05015811504926244, "grad_norm": 8.56372854986229, "learning_rate": 9.989367646239883e-06, "loss": 18.5043, "step": 2744 }, { "epoch": 0.05017639424570896, "grad_norm": 7.870519450132207, "learning_rate": 9.989348343493204e-06, "loss": 18.1654, "step": 2745 }, { "epoch": 0.05019467344215548, "grad_norm": 11.299407608036292, "learning_rate": 9.989329023259291e-06, "loss": 17.8543, "step": 2746 }, { "epoch": 0.05021295263860201, "grad_norm": 6.967217079294844, "learning_rate": 9.989309685538214e-06, "loss": 17.8087, "step": 2747 }, { "epoch": 0.05023123183504853, "grad_norm": 7.634909711788494, "learning_rate": 9.989290330330038e-06, "loss": 17.8961, "step": 2748 }, { "epoch": 0.05024951103149505, "grad_norm": 8.418568458532993, "learning_rate": 9.989270957634834e-06, "loss": 18.7163, "step": 2749 }, { "epoch": 0.05026779022794158, "grad_norm": 8.256148404351109, "learning_rate": 9.989251567452668e-06, "loss": 18.1897, "step": 2750 }, { "epoch": 0.050286069424388104, "grad_norm": 7.9624461708300975, "learning_rate": 9.989232159783606e-06, "loss": 17.9329, "step": 2751 }, { "epoch": 0.050304348620834625, "grad_norm": 8.20349321365802, "learning_rate": 9.98921273462772e-06, "loss": 17.9774, "step": 2752 }, { "epoch": 0.050322627817281154, "grad_norm": 8.250555558527648, "learning_rate": 9.989193291985076e-06, "loss": 18.4838, "step": 2753 }, { "epoch": 0.050340907013727676, "grad_norm": 7.451051830766465, "learning_rate": 9.98917383185574e-06, "loss": 17.7306, "step": 2754 }, { "epoch": 0.0503591862101742, "grad_norm": 7.542378455227766, "learning_rate": 9.989154354239787e-06, "loss": 18.0032, "step": 2755 }, { "epoch": 0.050377465406620726, "grad_norm": 8.392777781864194, "learning_rate": 9.989134859137279e-06, "loss": 18.193, "step": 2756 }, { "epoch": 0.05039574460306725, "grad_norm": 7.408888359247001, "learning_rate": 9.989115346548285e-06, "loss": 18.0566, "step": 2757 }, { "epoch": 0.05041402379951377, "grad_norm": 7.532589654429319, "learning_rate": 9.989095816472876e-06, "loss": 17.8037, "step": 2758 }, { "epoch": 0.0504323029959603, "grad_norm": 7.664168018715292, "learning_rate": 9.989076268911119e-06, "loss": 18.1644, "step": 2759 }, { "epoch": 0.05045058219240682, "grad_norm": 7.5215914971492115, "learning_rate": 9.989056703863083e-06, "loss": 17.9341, "step": 2760 }, { "epoch": 0.05046886138885335, "grad_norm": 7.206447022471866, "learning_rate": 9.989037121328835e-06, "loss": 17.9137, "step": 2761 }, { "epoch": 0.05048714058529987, "grad_norm": 7.76329719211878, "learning_rate": 9.989017521308447e-06, "loss": 18.292, "step": 2762 }, { "epoch": 0.05050541978174639, "grad_norm": 7.716220081439177, "learning_rate": 9.988997903801984e-06, "loss": 17.5243, "step": 2763 }, { "epoch": 0.05052369897819292, "grad_norm": 8.135237247062651, "learning_rate": 9.988978268809516e-06, "loss": 18.4224, "step": 2764 }, { "epoch": 0.05054197817463944, "grad_norm": 7.340949505261392, "learning_rate": 9.988958616331112e-06, "loss": 17.8489, "step": 2765 }, { "epoch": 0.050560257371085965, "grad_norm": 8.604957659993723, "learning_rate": 9.988938946366841e-06, "loss": 18.1277, "step": 2766 }, { "epoch": 0.050578536567532494, "grad_norm": 6.703904060868726, "learning_rate": 9.988919258916775e-06, "loss": 17.6016, "step": 2767 }, { "epoch": 0.050596815763979015, "grad_norm": 7.615904567530197, "learning_rate": 9.988899553980975e-06, "loss": 18.0835, "step": 2768 }, { "epoch": 0.05061509496042554, "grad_norm": 7.656346752005079, "learning_rate": 9.988879831559516e-06, "loss": 17.724, "step": 2769 }, { "epoch": 0.050633374156872066, "grad_norm": 7.6703645021028946, "learning_rate": 9.988860091652469e-06, "loss": 17.9447, "step": 2770 }, { "epoch": 0.05065165335331859, "grad_norm": 6.133725110298065, "learning_rate": 9.988840334259898e-06, "loss": 17.2218, "step": 2771 }, { "epoch": 0.05066993254976511, "grad_norm": 8.109487016435057, "learning_rate": 9.988820559381876e-06, "loss": 18.2571, "step": 2772 }, { "epoch": 0.05068821174621164, "grad_norm": 6.865556555909455, "learning_rate": 9.98880076701847e-06, "loss": 17.6203, "step": 2773 }, { "epoch": 0.05070649094265816, "grad_norm": 8.299023898912916, "learning_rate": 9.98878095716975e-06, "loss": 17.972, "step": 2774 }, { "epoch": 0.05072477013910468, "grad_norm": 7.050039866717872, "learning_rate": 9.988761129835787e-06, "loss": 17.6643, "step": 2775 }, { "epoch": 0.05074304933555121, "grad_norm": 7.070438246786039, "learning_rate": 9.988741285016648e-06, "loss": 17.748, "step": 2776 }, { "epoch": 0.05076132853199773, "grad_norm": 7.422909549351179, "learning_rate": 9.988721422712401e-06, "loss": 17.976, "step": 2777 }, { "epoch": 0.05077960772844426, "grad_norm": 7.5322755767621326, "learning_rate": 9.988701542923122e-06, "loss": 17.9389, "step": 2778 }, { "epoch": 0.05079788692489078, "grad_norm": 7.267922269520265, "learning_rate": 9.988681645648876e-06, "loss": 17.4306, "step": 2779 }, { "epoch": 0.050816166121337304, "grad_norm": 10.366644639631332, "learning_rate": 9.988661730889734e-06, "loss": 18.8708, "step": 2780 }, { "epoch": 0.05083444531778383, "grad_norm": 8.489072949808808, "learning_rate": 9.988641798645767e-06, "loss": 18.211, "step": 2781 }, { "epoch": 0.050852724514230355, "grad_norm": 7.94867403158134, "learning_rate": 9.98862184891704e-06, "loss": 17.809, "step": 2782 }, { "epoch": 0.050871003710676876, "grad_norm": 8.308052879184427, "learning_rate": 9.988601881703628e-06, "loss": 18.4109, "step": 2783 }, { "epoch": 0.050889282907123405, "grad_norm": 6.382505749570598, "learning_rate": 9.9885818970056e-06, "loss": 17.5987, "step": 2784 }, { "epoch": 0.05090756210356993, "grad_norm": 8.65878624516278, "learning_rate": 9.988561894823025e-06, "loss": 18.5557, "step": 2785 }, { "epoch": 0.05092584130001645, "grad_norm": 7.704704682550004, "learning_rate": 9.988541875155972e-06, "loss": 18.2683, "step": 2786 }, { "epoch": 0.05094412049646298, "grad_norm": 7.87920528871043, "learning_rate": 9.988521838004515e-06, "loss": 17.8086, "step": 2787 }, { "epoch": 0.0509623996929095, "grad_norm": 9.26821564888294, "learning_rate": 9.98850178336872e-06, "loss": 18.5718, "step": 2788 }, { "epoch": 0.05098067888935602, "grad_norm": 6.667709687642425, "learning_rate": 9.988481711248659e-06, "loss": 17.7336, "step": 2789 }, { "epoch": 0.05099895808580255, "grad_norm": 7.861762973631642, "learning_rate": 9.988461621644403e-06, "loss": 17.9559, "step": 2790 }, { "epoch": 0.05101723728224907, "grad_norm": 8.98523827817954, "learning_rate": 9.988441514556022e-06, "loss": 18.5481, "step": 2791 }, { "epoch": 0.05103551647869559, "grad_norm": 7.411424716591436, "learning_rate": 9.988421389983588e-06, "loss": 17.8808, "step": 2792 }, { "epoch": 0.05105379567514212, "grad_norm": 7.7931229957919035, "learning_rate": 9.988401247927167e-06, "loss": 17.8705, "step": 2793 }, { "epoch": 0.051072074871588644, "grad_norm": 8.198926676791432, "learning_rate": 9.988381088386833e-06, "loss": 18.1572, "step": 2794 }, { "epoch": 0.05109035406803517, "grad_norm": 7.361559633425119, "learning_rate": 9.988360911362658e-06, "loss": 17.7629, "step": 2795 }, { "epoch": 0.051108633264481694, "grad_norm": 8.045631817199665, "learning_rate": 9.98834071685471e-06, "loss": 18.3438, "step": 2796 }, { "epoch": 0.051126912460928216, "grad_norm": 6.775119038069602, "learning_rate": 9.98832050486306e-06, "loss": 17.5067, "step": 2797 }, { "epoch": 0.051145191657374744, "grad_norm": 7.169523145096916, "learning_rate": 9.988300275387781e-06, "loss": 17.9069, "step": 2798 }, { "epoch": 0.051163470853821266, "grad_norm": 9.458653998953265, "learning_rate": 9.98828002842894e-06, "loss": 18.2526, "step": 2799 }, { "epoch": 0.05118175005026779, "grad_norm": 7.221610231316801, "learning_rate": 9.988259763986613e-06, "loss": 17.6764, "step": 2800 }, { "epoch": 0.05120002924671432, "grad_norm": 8.594630950349286, "learning_rate": 9.988239482060867e-06, "loss": 18.0721, "step": 2801 }, { "epoch": 0.05121830844316084, "grad_norm": 7.934044726120394, "learning_rate": 9.988219182651775e-06, "loss": 18.1069, "step": 2802 }, { "epoch": 0.05123658763960736, "grad_norm": 9.296702673248296, "learning_rate": 9.988198865759406e-06, "loss": 18.4842, "step": 2803 }, { "epoch": 0.05125486683605389, "grad_norm": 7.743424084410392, "learning_rate": 9.988178531383834e-06, "loss": 18.0088, "step": 2804 }, { "epoch": 0.05127314603250041, "grad_norm": 7.314397756537535, "learning_rate": 9.98815817952513e-06, "loss": 17.5609, "step": 2805 }, { "epoch": 0.05129142522894693, "grad_norm": 8.346376439142999, "learning_rate": 9.988137810183363e-06, "loss": 18.2768, "step": 2806 }, { "epoch": 0.05130970442539346, "grad_norm": 7.676496545059605, "learning_rate": 9.988117423358607e-06, "loss": 18.0011, "step": 2807 }, { "epoch": 0.05132798362183998, "grad_norm": 6.422274098613539, "learning_rate": 9.988097019050932e-06, "loss": 17.4351, "step": 2808 }, { "epoch": 0.051346262818286505, "grad_norm": 8.137567816071021, "learning_rate": 9.98807659726041e-06, "loss": 17.8924, "step": 2809 }, { "epoch": 0.05136454201473303, "grad_norm": 8.880395227381214, "learning_rate": 9.988056157987111e-06, "loss": 18.3481, "step": 2810 }, { "epoch": 0.051382821211179555, "grad_norm": 7.717293646585981, "learning_rate": 9.988035701231108e-06, "loss": 18.0143, "step": 2811 }, { "epoch": 0.051401100407626084, "grad_norm": 7.959960217519171, "learning_rate": 9.988015226992476e-06, "loss": 18.2028, "step": 2812 }, { "epoch": 0.051419379604072606, "grad_norm": 7.950395785233589, "learning_rate": 9.98799473527128e-06, "loss": 18.3118, "step": 2813 }, { "epoch": 0.05143765880051913, "grad_norm": 7.384008026563201, "learning_rate": 9.987974226067597e-06, "loss": 17.6602, "step": 2814 }, { "epoch": 0.051455937996965656, "grad_norm": 9.229153952983667, "learning_rate": 9.987953699381496e-06, "loss": 18.5138, "step": 2815 }, { "epoch": 0.05147421719341218, "grad_norm": 8.544361818534913, "learning_rate": 9.98793315521305e-06, "loss": 19.0313, "step": 2816 }, { "epoch": 0.0514924963898587, "grad_norm": 8.774064474288327, "learning_rate": 9.987912593562332e-06, "loss": 17.9237, "step": 2817 }, { "epoch": 0.05151077558630523, "grad_norm": 7.47245073365734, "learning_rate": 9.987892014429414e-06, "loss": 17.6392, "step": 2818 }, { "epoch": 0.05152905478275175, "grad_norm": 7.947494112070558, "learning_rate": 9.987871417814365e-06, "loss": 18.1905, "step": 2819 }, { "epoch": 0.05154733397919827, "grad_norm": 7.694083457772169, "learning_rate": 9.987850803717262e-06, "loss": 17.829, "step": 2820 }, { "epoch": 0.0515656131756448, "grad_norm": 7.212210507516075, "learning_rate": 9.987830172138174e-06, "loss": 17.9155, "step": 2821 }, { "epoch": 0.05158389237209132, "grad_norm": 7.334533983864379, "learning_rate": 9.987809523077173e-06, "loss": 17.5396, "step": 2822 }, { "epoch": 0.051602171568537844, "grad_norm": 8.101411659752552, "learning_rate": 9.987788856534333e-06, "loss": 18.021, "step": 2823 }, { "epoch": 0.05162045076498437, "grad_norm": 6.834266760261926, "learning_rate": 9.987768172509727e-06, "loss": 17.5892, "step": 2824 }, { "epoch": 0.051638729961430894, "grad_norm": 7.733353573907947, "learning_rate": 9.987747471003424e-06, "loss": 18.3115, "step": 2825 }, { "epoch": 0.051657009157877416, "grad_norm": 7.736141934885637, "learning_rate": 9.9877267520155e-06, "loss": 18.0004, "step": 2826 }, { "epoch": 0.051675288354323945, "grad_norm": 7.272801249753164, "learning_rate": 9.987706015546027e-06, "loss": 17.8209, "step": 2827 }, { "epoch": 0.05169356755077047, "grad_norm": 6.3552622795233145, "learning_rate": 9.987685261595077e-06, "loss": 17.6643, "step": 2828 }, { "epoch": 0.051711846747216995, "grad_norm": 7.530113854779712, "learning_rate": 9.987664490162722e-06, "loss": 18.1216, "step": 2829 }, { "epoch": 0.05173012594366352, "grad_norm": 6.3255561749029425, "learning_rate": 9.987643701249038e-06, "loss": 17.3771, "step": 2830 }, { "epoch": 0.05174840514011004, "grad_norm": 7.601770793981465, "learning_rate": 9.987622894854094e-06, "loss": 17.9845, "step": 2831 }, { "epoch": 0.05176668433655657, "grad_norm": 7.655184473142678, "learning_rate": 9.987602070977966e-06, "loss": 17.804, "step": 2832 }, { "epoch": 0.05178496353300309, "grad_norm": 6.22552614974228, "learning_rate": 9.987581229620724e-06, "loss": 17.4074, "step": 2833 }, { "epoch": 0.05180324272944961, "grad_norm": 8.24580283423378, "learning_rate": 9.987560370782443e-06, "loss": 18.3363, "step": 2834 }, { "epoch": 0.05182152192589614, "grad_norm": 8.827466730094583, "learning_rate": 9.987539494463197e-06, "loss": 18.5687, "step": 2835 }, { "epoch": 0.05183980112234266, "grad_norm": 8.22822456444839, "learning_rate": 9.987518600663055e-06, "loss": 18.0483, "step": 2836 }, { "epoch": 0.05185808031878918, "grad_norm": 7.6572746032820636, "learning_rate": 9.987497689382095e-06, "loss": 17.8647, "step": 2837 }, { "epoch": 0.05187635951523571, "grad_norm": 7.507526556204351, "learning_rate": 9.987476760620389e-06, "loss": 17.7855, "step": 2838 }, { "epoch": 0.051894638711682234, "grad_norm": 8.800249578147918, "learning_rate": 9.987455814378008e-06, "loss": 18.3258, "step": 2839 }, { "epoch": 0.051912917908128756, "grad_norm": 8.583545120493428, "learning_rate": 9.98743485065503e-06, "loss": 18.4643, "step": 2840 }, { "epoch": 0.051931197104575284, "grad_norm": 8.589006782220283, "learning_rate": 9.987413869451522e-06, "loss": 18.3036, "step": 2841 }, { "epoch": 0.051949476301021806, "grad_norm": 8.02098650833697, "learning_rate": 9.987392870767563e-06, "loss": 18.0703, "step": 2842 }, { "epoch": 0.05196775549746833, "grad_norm": 8.222004790638712, "learning_rate": 9.987371854603226e-06, "loss": 18.2864, "step": 2843 }, { "epoch": 0.05198603469391486, "grad_norm": 6.197485706489133, "learning_rate": 9.987350820958581e-06, "loss": 17.4297, "step": 2844 }, { "epoch": 0.05200431389036138, "grad_norm": 7.830326125569888, "learning_rate": 9.987329769833705e-06, "loss": 18.1324, "step": 2845 }, { "epoch": 0.05202259308680791, "grad_norm": 8.706779415610775, "learning_rate": 9.987308701228672e-06, "loss": 18.2751, "step": 2846 }, { "epoch": 0.05204087228325443, "grad_norm": 6.159749869858416, "learning_rate": 9.987287615143554e-06, "loss": 17.5055, "step": 2847 }, { "epoch": 0.05205915147970095, "grad_norm": 8.608441703583498, "learning_rate": 9.987266511578425e-06, "loss": 18.2668, "step": 2848 }, { "epoch": 0.05207743067614748, "grad_norm": 8.791378746879808, "learning_rate": 9.98724539053336e-06, "loss": 18.3234, "step": 2849 }, { "epoch": 0.052095709872594, "grad_norm": 7.175952125316248, "learning_rate": 9.987224252008433e-06, "loss": 17.918, "step": 2850 }, { "epoch": 0.05211398906904052, "grad_norm": 8.17411494576387, "learning_rate": 9.987203096003719e-06, "loss": 18.7266, "step": 2851 }, { "epoch": 0.05213226826548705, "grad_norm": 7.924278562250412, "learning_rate": 9.98718192251929e-06, "loss": 18.2808, "step": 2852 }, { "epoch": 0.05215054746193357, "grad_norm": 7.0300478896758225, "learning_rate": 9.98716073155522e-06, "loss": 17.9663, "step": 2853 }, { "epoch": 0.052168826658380095, "grad_norm": 8.450063164139488, "learning_rate": 9.987139523111585e-06, "loss": 18.3254, "step": 2854 }, { "epoch": 0.052187105854826624, "grad_norm": 8.215549204227752, "learning_rate": 9.98711829718846e-06, "loss": 18.232, "step": 2855 }, { "epoch": 0.052205385051273145, "grad_norm": 7.8183822272817505, "learning_rate": 9.987097053785918e-06, "loss": 18.2791, "step": 2856 }, { "epoch": 0.05222366424771967, "grad_norm": 8.670558646100888, "learning_rate": 9.987075792904031e-06, "loss": 18.415, "step": 2857 }, { "epoch": 0.052241943444166196, "grad_norm": 8.412970962496319, "learning_rate": 9.987054514542878e-06, "loss": 18.25, "step": 2858 }, { "epoch": 0.05226022264061272, "grad_norm": 6.930987927455506, "learning_rate": 9.987033218702531e-06, "loss": 17.5139, "step": 2859 }, { "epoch": 0.05227850183705924, "grad_norm": 7.58864030339164, "learning_rate": 9.987011905383066e-06, "loss": 17.9744, "step": 2860 }, { "epoch": 0.05229678103350577, "grad_norm": 10.258257176125628, "learning_rate": 9.986990574584556e-06, "loss": 18.803, "step": 2861 }, { "epoch": 0.05231506022995229, "grad_norm": 7.413678639752427, "learning_rate": 9.986969226307078e-06, "loss": 17.7989, "step": 2862 }, { "epoch": 0.05233333942639882, "grad_norm": 7.445005414972241, "learning_rate": 9.986947860550705e-06, "loss": 17.9625, "step": 2863 }, { "epoch": 0.05235161862284534, "grad_norm": 7.4133344111490915, "learning_rate": 9.986926477315512e-06, "loss": 17.7285, "step": 2864 }, { "epoch": 0.05236989781929186, "grad_norm": 7.1495490687528624, "learning_rate": 9.986905076601577e-06, "loss": 17.9919, "step": 2865 }, { "epoch": 0.05238817701573839, "grad_norm": 7.945602880664831, "learning_rate": 9.98688365840897e-06, "loss": 18.034, "step": 2866 }, { "epoch": 0.05240645621218491, "grad_norm": 9.226799939576006, "learning_rate": 9.98686222273777e-06, "loss": 18.2153, "step": 2867 }, { "epoch": 0.052424735408631434, "grad_norm": 7.4125133452789544, "learning_rate": 9.986840769588049e-06, "loss": 17.8405, "step": 2868 }, { "epoch": 0.05244301460507796, "grad_norm": 8.488537991500818, "learning_rate": 9.986819298959885e-06, "loss": 18.0606, "step": 2869 }, { "epoch": 0.052461293801524485, "grad_norm": 6.820147326899155, "learning_rate": 9.986797810853352e-06, "loss": 17.4996, "step": 2870 }, { "epoch": 0.05247957299797101, "grad_norm": 5.829696315380732, "learning_rate": 9.986776305268524e-06, "loss": 17.2355, "step": 2871 }, { "epoch": 0.052497852194417535, "grad_norm": 6.216952987512384, "learning_rate": 9.986754782205479e-06, "loss": 17.3076, "step": 2872 }, { "epoch": 0.05251613139086406, "grad_norm": 7.184225084460174, "learning_rate": 9.986733241664291e-06, "loss": 17.6507, "step": 2873 }, { "epoch": 0.05253441058731058, "grad_norm": 7.666798363507448, "learning_rate": 9.986711683645036e-06, "loss": 17.8974, "step": 2874 }, { "epoch": 0.05255268978375711, "grad_norm": 8.129386680646322, "learning_rate": 9.98669010814779e-06, "loss": 18.1159, "step": 2875 }, { "epoch": 0.05257096898020363, "grad_norm": 8.878008739092836, "learning_rate": 9.986668515172626e-06, "loss": 18.3956, "step": 2876 }, { "epoch": 0.05258924817665015, "grad_norm": 8.609838043081584, "learning_rate": 9.986646904719624e-06, "loss": 17.837, "step": 2877 }, { "epoch": 0.05260752737309668, "grad_norm": 8.710736010767981, "learning_rate": 9.986625276788855e-06, "loss": 18.8199, "step": 2878 }, { "epoch": 0.0526258065695432, "grad_norm": 7.748052330501188, "learning_rate": 9.986603631380399e-06, "loss": 17.93, "step": 2879 }, { "epoch": 0.05264408576598973, "grad_norm": 7.966771545840098, "learning_rate": 9.986581968494328e-06, "loss": 18.3258, "step": 2880 }, { "epoch": 0.05266236496243625, "grad_norm": 7.6868080308718785, "learning_rate": 9.98656028813072e-06, "loss": 18.0548, "step": 2881 }, { "epoch": 0.052680644158882774, "grad_norm": 7.655713983096808, "learning_rate": 9.986538590289654e-06, "loss": 18.0637, "step": 2882 }, { "epoch": 0.0526989233553293, "grad_norm": 7.5684529254383595, "learning_rate": 9.9865168749712e-06, "loss": 17.9218, "step": 2883 }, { "epoch": 0.052717202551775824, "grad_norm": 7.6454991418926905, "learning_rate": 9.986495142175437e-06, "loss": 17.786, "step": 2884 }, { "epoch": 0.052735481748222346, "grad_norm": 7.733469324401486, "learning_rate": 9.986473391902442e-06, "loss": 18.0281, "step": 2885 }, { "epoch": 0.052753760944668875, "grad_norm": 7.64452382839804, "learning_rate": 9.986451624152291e-06, "loss": 17.9382, "step": 2886 }, { "epoch": 0.052772040141115396, "grad_norm": 8.626678024177481, "learning_rate": 9.986429838925059e-06, "loss": 18.3031, "step": 2887 }, { "epoch": 0.05279031933756192, "grad_norm": 7.30641898232523, "learning_rate": 9.986408036220825e-06, "loss": 17.8578, "step": 2888 }, { "epoch": 0.05280859853400845, "grad_norm": 8.222903766705286, "learning_rate": 9.986386216039661e-06, "loss": 18.5021, "step": 2889 }, { "epoch": 0.05282687773045497, "grad_norm": 6.821592759931198, "learning_rate": 9.986364378381647e-06, "loss": 17.5543, "step": 2890 }, { "epoch": 0.05284515692690149, "grad_norm": 5.638335855246105, "learning_rate": 9.986342523246857e-06, "loss": 17.3, "step": 2891 }, { "epoch": 0.05286343612334802, "grad_norm": 6.637079075331286, "learning_rate": 9.986320650635371e-06, "loss": 17.6331, "step": 2892 }, { "epoch": 0.05288171531979454, "grad_norm": 8.285688386237048, "learning_rate": 9.986298760547264e-06, "loss": 18.2569, "step": 2893 }, { "epoch": 0.05289999451624106, "grad_norm": 7.658564001180271, "learning_rate": 9.986276852982612e-06, "loss": 17.7906, "step": 2894 }, { "epoch": 0.05291827371268759, "grad_norm": 9.256670358946646, "learning_rate": 9.986254927941492e-06, "loss": 18.7537, "step": 2895 }, { "epoch": 0.05293655290913411, "grad_norm": 8.027355404343318, "learning_rate": 9.986232985423982e-06, "loss": 18.3691, "step": 2896 }, { "epoch": 0.05295483210558064, "grad_norm": 7.654962863757842, "learning_rate": 9.986211025430156e-06, "loss": 17.8887, "step": 2897 }, { "epoch": 0.052973111302027164, "grad_norm": 8.15685620262823, "learning_rate": 9.986189047960096e-06, "loss": 18.5433, "step": 2898 }, { "epoch": 0.052991390498473685, "grad_norm": 11.409212729323968, "learning_rate": 9.986167053013875e-06, "loss": 18.1699, "step": 2899 }, { "epoch": 0.053009669694920214, "grad_norm": 7.302661018768685, "learning_rate": 9.986145040591571e-06, "loss": 17.9199, "step": 2900 }, { "epoch": 0.053027948891366736, "grad_norm": 8.143621464920205, "learning_rate": 9.986123010693261e-06, "loss": 18.4157, "step": 2901 }, { "epoch": 0.05304622808781326, "grad_norm": 6.974448154848328, "learning_rate": 9.986100963319023e-06, "loss": 17.4241, "step": 2902 }, { "epoch": 0.053064507284259786, "grad_norm": 9.076493067839538, "learning_rate": 9.986078898468934e-06, "loss": 18.4846, "step": 2903 }, { "epoch": 0.05308278648070631, "grad_norm": 8.606778338151203, "learning_rate": 9.98605681614307e-06, "loss": 18.4125, "step": 2904 }, { "epoch": 0.05310106567715283, "grad_norm": 9.238450490429171, "learning_rate": 9.98603471634151e-06, "loss": 18.6629, "step": 2905 }, { "epoch": 0.05311934487359936, "grad_norm": 7.476422017962867, "learning_rate": 9.986012599064332e-06, "loss": 17.6207, "step": 2906 }, { "epoch": 0.05313762407004588, "grad_norm": 10.354709407007224, "learning_rate": 9.98599046431161e-06, "loss": 19.1014, "step": 2907 }, { "epoch": 0.0531559032664924, "grad_norm": 7.589139360560411, "learning_rate": 9.985968312083428e-06, "loss": 17.8751, "step": 2908 }, { "epoch": 0.05317418246293893, "grad_norm": 7.32388149339212, "learning_rate": 9.985946142379856e-06, "loss": 17.7151, "step": 2909 }, { "epoch": 0.05319246165938545, "grad_norm": 9.125049790803184, "learning_rate": 9.985923955200977e-06, "loss": 18.2955, "step": 2910 }, { "epoch": 0.053210740855831974, "grad_norm": 7.4579332937791945, "learning_rate": 9.985901750546867e-06, "loss": 17.7339, "step": 2911 }, { "epoch": 0.0532290200522785, "grad_norm": 8.629397393390434, "learning_rate": 9.985879528417604e-06, "loss": 18.2328, "step": 2912 }, { "epoch": 0.053247299248725025, "grad_norm": 8.475233804732257, "learning_rate": 9.985857288813266e-06, "loss": 17.9082, "step": 2913 }, { "epoch": 0.05326557844517155, "grad_norm": 7.094067999462129, "learning_rate": 9.985835031733931e-06, "loss": 17.638, "step": 2914 }, { "epoch": 0.053283857641618075, "grad_norm": 7.467000267979098, "learning_rate": 9.985812757179677e-06, "loss": 17.728, "step": 2915 }, { "epoch": 0.0533021368380646, "grad_norm": 8.54598987332618, "learning_rate": 9.98579046515058e-06, "loss": 18.105, "step": 2916 }, { "epoch": 0.053320416034511126, "grad_norm": 7.344890427722456, "learning_rate": 9.985768155646721e-06, "loss": 17.7487, "step": 2917 }, { "epoch": 0.05333869523095765, "grad_norm": 6.628978628649656, "learning_rate": 9.98574582866818e-06, "loss": 17.7106, "step": 2918 }, { "epoch": 0.05335697442740417, "grad_norm": 8.360264382365479, "learning_rate": 9.98572348421503e-06, "loss": 17.9406, "step": 2919 }, { "epoch": 0.0533752536238507, "grad_norm": 6.455080799676221, "learning_rate": 9.985701122287352e-06, "loss": 17.3594, "step": 2920 }, { "epoch": 0.05339353282029722, "grad_norm": 9.13581655350652, "learning_rate": 9.985678742885225e-06, "loss": 18.6982, "step": 2921 }, { "epoch": 0.05341181201674374, "grad_norm": 9.032337151984729, "learning_rate": 9.985656346008727e-06, "loss": 18.4182, "step": 2922 }, { "epoch": 0.05343009121319027, "grad_norm": 8.318073385313161, "learning_rate": 9.985633931657934e-06, "loss": 18.0261, "step": 2923 }, { "epoch": 0.05344837040963679, "grad_norm": 7.455914868336505, "learning_rate": 9.985611499832929e-06, "loss": 17.9919, "step": 2924 }, { "epoch": 0.053466649606083314, "grad_norm": 7.73762423130492, "learning_rate": 9.985589050533787e-06, "loss": 17.6016, "step": 2925 }, { "epoch": 0.05348492880252984, "grad_norm": 7.469676296186834, "learning_rate": 9.98556658376059e-06, "loss": 18.2103, "step": 2926 }, { "epoch": 0.053503207998976364, "grad_norm": 9.74712854445016, "learning_rate": 9.985544099513412e-06, "loss": 18.6005, "step": 2927 }, { "epoch": 0.053521487195422886, "grad_norm": 8.990621077448539, "learning_rate": 9.985521597792336e-06, "loss": 18.4498, "step": 2928 }, { "epoch": 0.053539766391869414, "grad_norm": 7.56040073408485, "learning_rate": 9.985499078597438e-06, "loss": 17.9899, "step": 2929 }, { "epoch": 0.053558045588315936, "grad_norm": 7.9332027847347515, "learning_rate": 9.9854765419288e-06, "loss": 18.2143, "step": 2930 }, { "epoch": 0.053576324784762465, "grad_norm": 8.469629512471046, "learning_rate": 9.985453987786498e-06, "loss": 18.0529, "step": 2931 }, { "epoch": 0.05359460398120899, "grad_norm": 7.188217590980575, "learning_rate": 9.985431416170615e-06, "loss": 17.7905, "step": 2932 }, { "epoch": 0.05361288317765551, "grad_norm": 7.1093033521734235, "learning_rate": 9.985408827081225e-06, "loss": 17.8705, "step": 2933 }, { "epoch": 0.05363116237410204, "grad_norm": 7.80175291704277, "learning_rate": 9.98538622051841e-06, "loss": 18.183, "step": 2934 }, { "epoch": 0.05364944157054856, "grad_norm": 6.68863290206066, "learning_rate": 9.98536359648225e-06, "loss": 17.4041, "step": 2935 }, { "epoch": 0.05366772076699508, "grad_norm": 9.240501255887352, "learning_rate": 9.985340954972822e-06, "loss": 18.7026, "step": 2936 }, { "epoch": 0.05368599996344161, "grad_norm": 6.466188007149112, "learning_rate": 9.985318295990207e-06, "loss": 17.3034, "step": 2937 }, { "epoch": 0.05370427915988813, "grad_norm": 7.41776976246194, "learning_rate": 9.985295619534482e-06, "loss": 17.8361, "step": 2938 }, { "epoch": 0.05372255835633465, "grad_norm": 7.012147727360407, "learning_rate": 9.985272925605732e-06, "loss": 17.793, "step": 2939 }, { "epoch": 0.05374083755278118, "grad_norm": 7.991114266886512, "learning_rate": 9.985250214204032e-06, "loss": 18.0574, "step": 2940 }, { "epoch": 0.0537591167492277, "grad_norm": 9.579076946959471, "learning_rate": 9.985227485329461e-06, "loss": 18.7311, "step": 2941 }, { "epoch": 0.053777395945674225, "grad_norm": 7.725627071464176, "learning_rate": 9.985204738982102e-06, "loss": 17.8584, "step": 2942 }, { "epoch": 0.053795675142120754, "grad_norm": 8.69365900468432, "learning_rate": 9.985181975162032e-06, "loss": 18.2724, "step": 2943 }, { "epoch": 0.053813954338567276, "grad_norm": 9.613125557602531, "learning_rate": 9.985159193869333e-06, "loss": 19.1958, "step": 2944 }, { "epoch": 0.053832233535013804, "grad_norm": 6.2900735210417436, "learning_rate": 9.985136395104082e-06, "loss": 17.5073, "step": 2945 }, { "epoch": 0.053850512731460326, "grad_norm": 6.6392678417274755, "learning_rate": 9.98511357886636e-06, "loss": 17.6563, "step": 2946 }, { "epoch": 0.05386879192790685, "grad_norm": 7.664506189258505, "learning_rate": 9.985090745156249e-06, "loss": 17.8886, "step": 2947 }, { "epoch": 0.053887071124353376, "grad_norm": 9.27521136601304, "learning_rate": 9.985067893973828e-06, "loss": 18.4531, "step": 2948 }, { "epoch": 0.0539053503207999, "grad_norm": 10.37789530787545, "learning_rate": 9.985045025319175e-06, "loss": 18.9894, "step": 2949 }, { "epoch": 0.05392362951724642, "grad_norm": 7.78415898265666, "learning_rate": 9.985022139192372e-06, "loss": 17.8419, "step": 2950 }, { "epoch": 0.05394190871369295, "grad_norm": 7.510278415092891, "learning_rate": 9.984999235593502e-06, "loss": 17.9377, "step": 2951 }, { "epoch": 0.05396018791013947, "grad_norm": 6.68026717118842, "learning_rate": 9.984976314522638e-06, "loss": 17.4353, "step": 2952 }, { "epoch": 0.05397846710658599, "grad_norm": 7.087524709653039, "learning_rate": 9.984953375979868e-06, "loss": 17.7807, "step": 2953 }, { "epoch": 0.05399674630303252, "grad_norm": 8.411742752976526, "learning_rate": 9.984930419965266e-06, "loss": 18.2082, "step": 2954 }, { "epoch": 0.05401502549947904, "grad_norm": 7.812448934713648, "learning_rate": 9.984907446478918e-06, "loss": 18.1363, "step": 2955 }, { "epoch": 0.054033304695925564, "grad_norm": 8.044356207894898, "learning_rate": 9.9848844555209e-06, "loss": 18.2735, "step": 2956 }, { "epoch": 0.05405158389237209, "grad_norm": 7.736509632554892, "learning_rate": 9.984861447091296e-06, "loss": 18.1222, "step": 2957 }, { "epoch": 0.054069863088818615, "grad_norm": 5.933792456814279, "learning_rate": 9.984838421190184e-06, "loss": 17.1622, "step": 2958 }, { "epoch": 0.05408814228526514, "grad_norm": 7.904815188668401, "learning_rate": 9.984815377817648e-06, "loss": 17.5206, "step": 2959 }, { "epoch": 0.054106421481711665, "grad_norm": 7.295836961563647, "learning_rate": 9.984792316973765e-06, "loss": 17.8553, "step": 2960 }, { "epoch": 0.05412470067815819, "grad_norm": 8.1689220413142, "learning_rate": 9.984769238658617e-06, "loss": 18.0965, "step": 2961 }, { "epoch": 0.054142979874604716, "grad_norm": 7.841275180485553, "learning_rate": 9.984746142872287e-06, "loss": 18.3405, "step": 2962 }, { "epoch": 0.05416125907105124, "grad_norm": 6.116042868616801, "learning_rate": 9.984723029614853e-06, "loss": 17.2451, "step": 2963 }, { "epoch": 0.05417953826749776, "grad_norm": 7.649454684530826, "learning_rate": 9.984699898886397e-06, "loss": 17.9682, "step": 2964 }, { "epoch": 0.05419781746394429, "grad_norm": 8.02078081864306, "learning_rate": 9.984676750687e-06, "loss": 18.1302, "step": 2965 }, { "epoch": 0.05421609666039081, "grad_norm": 6.873682823028608, "learning_rate": 9.984653585016747e-06, "loss": 17.6275, "step": 2966 }, { "epoch": 0.05423437585683733, "grad_norm": 7.627279055780362, "learning_rate": 9.984630401875712e-06, "loss": 17.8492, "step": 2967 }, { "epoch": 0.05425265505328386, "grad_norm": 7.2484768612040185, "learning_rate": 9.984607201263983e-06, "loss": 17.9409, "step": 2968 }, { "epoch": 0.05427093424973038, "grad_norm": 8.53649524945606, "learning_rate": 9.984583983181634e-06, "loss": 18.4741, "step": 2969 }, { "epoch": 0.054289213446176904, "grad_norm": 7.404241934179543, "learning_rate": 9.984560747628755e-06, "loss": 17.9715, "step": 2970 }, { "epoch": 0.05430749264262343, "grad_norm": 7.885455730551289, "learning_rate": 9.984537494605422e-06, "loss": 18.206, "step": 2971 }, { "epoch": 0.054325771839069954, "grad_norm": 7.419783122451107, "learning_rate": 9.984514224111717e-06, "loss": 18.0498, "step": 2972 }, { "epoch": 0.054344051035516476, "grad_norm": 7.201191579877522, "learning_rate": 9.984490936147722e-06, "loss": 17.7067, "step": 2973 }, { "epoch": 0.054362330231963005, "grad_norm": 7.263656909714293, "learning_rate": 9.98446763071352e-06, "loss": 17.8044, "step": 2974 }, { "epoch": 0.054380609428409526, "grad_norm": 7.688869607832282, "learning_rate": 9.984444307809189e-06, "loss": 18.0741, "step": 2975 }, { "epoch": 0.05439888862485605, "grad_norm": 7.933950074273668, "learning_rate": 9.984420967434815e-06, "loss": 18.2206, "step": 2976 }, { "epoch": 0.05441716782130258, "grad_norm": 7.689423644462968, "learning_rate": 9.984397609590478e-06, "loss": 17.866, "step": 2977 }, { "epoch": 0.0544354470177491, "grad_norm": 6.19100681423261, "learning_rate": 9.98437423427626e-06, "loss": 17.2061, "step": 2978 }, { "epoch": 0.05445372621419563, "grad_norm": 7.220269564326452, "learning_rate": 9.984350841492243e-06, "loss": 17.8103, "step": 2979 }, { "epoch": 0.05447200541064215, "grad_norm": 7.535332315801858, "learning_rate": 9.984327431238508e-06, "loss": 17.9382, "step": 2980 }, { "epoch": 0.05449028460708867, "grad_norm": 8.61754436364827, "learning_rate": 9.984304003515137e-06, "loss": 17.9783, "step": 2981 }, { "epoch": 0.0545085638035352, "grad_norm": 6.6467237653339, "learning_rate": 9.984280558322215e-06, "loss": 17.4914, "step": 2982 }, { "epoch": 0.05452684299998172, "grad_norm": 8.041593626098898, "learning_rate": 9.984257095659821e-06, "loss": 17.8183, "step": 2983 }, { "epoch": 0.05454512219642824, "grad_norm": 8.110138118165157, "learning_rate": 9.98423361552804e-06, "loss": 17.7319, "step": 2984 }, { "epoch": 0.05456340139287477, "grad_norm": 6.615120623978179, "learning_rate": 9.984210117926952e-06, "loss": 17.3877, "step": 2985 }, { "epoch": 0.054581680589321294, "grad_norm": 7.320762110935072, "learning_rate": 9.984186602856639e-06, "loss": 17.9738, "step": 2986 }, { "epoch": 0.054599959785767815, "grad_norm": 6.925733595848153, "learning_rate": 9.984163070317186e-06, "loss": 17.7418, "step": 2987 }, { "epoch": 0.054618238982214344, "grad_norm": 7.202411637507689, "learning_rate": 9.984139520308672e-06, "loss": 18.0995, "step": 2988 }, { "epoch": 0.054636518178660866, "grad_norm": 7.164653278773729, "learning_rate": 9.984115952831182e-06, "loss": 17.8108, "step": 2989 }, { "epoch": 0.05465479737510739, "grad_norm": 8.897122619932148, "learning_rate": 9.9840923678848e-06, "loss": 18.1692, "step": 2990 }, { "epoch": 0.054673076571553916, "grad_norm": 7.030115125998609, "learning_rate": 9.984068765469603e-06, "loss": 17.8911, "step": 2991 }, { "epoch": 0.05469135576800044, "grad_norm": 7.308291531501065, "learning_rate": 9.984045145585681e-06, "loss": 17.8525, "step": 2992 }, { "epoch": 0.05470963496444696, "grad_norm": 9.113822979077941, "learning_rate": 9.984021508233111e-06, "loss": 18.4587, "step": 2993 }, { "epoch": 0.05472791416089349, "grad_norm": 7.429441521159165, "learning_rate": 9.98399785341198e-06, "loss": 17.9046, "step": 2994 }, { "epoch": 0.05474619335734001, "grad_norm": 6.609227078282274, "learning_rate": 9.983974181122368e-06, "loss": 17.3922, "step": 2995 }, { "epoch": 0.05476447255378654, "grad_norm": 7.506487250012753, "learning_rate": 9.98395049136436e-06, "loss": 17.6925, "step": 2996 }, { "epoch": 0.05478275175023306, "grad_norm": 7.4518922699203864, "learning_rate": 9.983926784138036e-06, "loss": 17.8667, "step": 2997 }, { "epoch": 0.05480103094667958, "grad_norm": 8.356611829728001, "learning_rate": 9.983903059443482e-06, "loss": 18.1327, "step": 2998 }, { "epoch": 0.05481931014312611, "grad_norm": 8.82366343581557, "learning_rate": 9.98387931728078e-06, "loss": 18.3259, "step": 2999 }, { "epoch": 0.05483758933957263, "grad_norm": 8.375593874272335, "learning_rate": 9.983855557650015e-06, "loss": 18.6367, "step": 3000 }, { "epoch": 0.054855868536019155, "grad_norm": 6.387070611131344, "learning_rate": 9.983831780551268e-06, "loss": 17.3594, "step": 3001 }, { "epoch": 0.05487414773246568, "grad_norm": 7.948890739914074, "learning_rate": 9.983807985984621e-06, "loss": 18.0505, "step": 3002 }, { "epoch": 0.054892426928912205, "grad_norm": 7.114125273937795, "learning_rate": 9.983784173950163e-06, "loss": 17.6408, "step": 3003 }, { "epoch": 0.05491070612535873, "grad_norm": 7.006302790073139, "learning_rate": 9.983760344447972e-06, "loss": 17.6835, "step": 3004 }, { "epoch": 0.054928985321805256, "grad_norm": 7.28263684589425, "learning_rate": 9.983736497478134e-06, "loss": 17.6965, "step": 3005 }, { "epoch": 0.05494726451825178, "grad_norm": 7.751363796915561, "learning_rate": 9.983712633040732e-06, "loss": 17.8543, "step": 3006 }, { "epoch": 0.0549655437146983, "grad_norm": 8.07461854526666, "learning_rate": 9.983688751135849e-06, "loss": 18.1354, "step": 3007 }, { "epoch": 0.05498382291114483, "grad_norm": 8.218066766385554, "learning_rate": 9.983664851763571e-06, "loss": 18.3705, "step": 3008 }, { "epoch": 0.05500210210759135, "grad_norm": 8.192671558395386, "learning_rate": 9.983640934923977e-06, "loss": 18.5413, "step": 3009 }, { "epoch": 0.05502038130403787, "grad_norm": 7.698426913627192, "learning_rate": 9.983617000617157e-06, "loss": 17.7436, "step": 3010 }, { "epoch": 0.0550386605004844, "grad_norm": 7.305591498734186, "learning_rate": 9.98359304884319e-06, "loss": 17.8984, "step": 3011 }, { "epoch": 0.05505693969693092, "grad_norm": 7.462744730812517, "learning_rate": 9.983569079602163e-06, "loss": 17.8616, "step": 3012 }, { "epoch": 0.05507521889337745, "grad_norm": 9.204157449070113, "learning_rate": 9.983545092894158e-06, "loss": 18.6582, "step": 3013 }, { "epoch": 0.05509349808982397, "grad_norm": 6.781635136989092, "learning_rate": 9.983521088719262e-06, "loss": 17.5635, "step": 3014 }, { "epoch": 0.055111777286270494, "grad_norm": 7.529536812907106, "learning_rate": 9.983497067077554e-06, "loss": 17.8964, "step": 3015 }, { "epoch": 0.05513005648271702, "grad_norm": 8.508994685012931, "learning_rate": 9.983473027969122e-06, "loss": 18.6022, "step": 3016 }, { "epoch": 0.055148335679163545, "grad_norm": 7.681556406311905, "learning_rate": 9.983448971394051e-06, "loss": 17.9823, "step": 3017 }, { "epoch": 0.055166614875610066, "grad_norm": 7.653760825345166, "learning_rate": 9.983424897352422e-06, "loss": 18.1274, "step": 3018 }, { "epoch": 0.055184894072056595, "grad_norm": 7.305335009265789, "learning_rate": 9.983400805844324e-06, "loss": 18.0893, "step": 3019 }, { "epoch": 0.05520317326850312, "grad_norm": 5.745154385526005, "learning_rate": 9.983376696869836e-06, "loss": 17.3378, "step": 3020 }, { "epoch": 0.05522145246494964, "grad_norm": 6.646640837725589, "learning_rate": 9.983352570429046e-06, "loss": 17.5679, "step": 3021 }, { "epoch": 0.05523973166139617, "grad_norm": 7.178155182805772, "learning_rate": 9.983328426522036e-06, "loss": 17.7329, "step": 3022 }, { "epoch": 0.05525801085784269, "grad_norm": 8.229139627854392, "learning_rate": 9.983304265148894e-06, "loss": 18.2517, "step": 3023 }, { "epoch": 0.05527629005428921, "grad_norm": 7.105184523923117, "learning_rate": 9.983280086309703e-06, "loss": 17.9693, "step": 3024 }, { "epoch": 0.05529456925073574, "grad_norm": 7.504820287503861, "learning_rate": 9.983255890004548e-06, "loss": 18.0733, "step": 3025 }, { "epoch": 0.05531284844718226, "grad_norm": 8.121601282141208, "learning_rate": 9.983231676233513e-06, "loss": 18.4775, "step": 3026 }, { "epoch": 0.05533112764362878, "grad_norm": 7.522589727206308, "learning_rate": 9.983207444996682e-06, "loss": 17.9586, "step": 3027 }, { "epoch": 0.05534940684007531, "grad_norm": 8.58402787228452, "learning_rate": 9.983183196294144e-06, "loss": 18.5355, "step": 3028 }, { "epoch": 0.05536768603652183, "grad_norm": 6.920000719747553, "learning_rate": 9.98315893012598e-06, "loss": 17.6501, "step": 3029 }, { "epoch": 0.05538596523296836, "grad_norm": 6.34494087197748, "learning_rate": 9.983134646492277e-06, "loss": 17.4743, "step": 3030 }, { "epoch": 0.055404244429414884, "grad_norm": 9.061419916247212, "learning_rate": 9.983110345393119e-06, "loss": 18.9192, "step": 3031 }, { "epoch": 0.055422523625861406, "grad_norm": 7.2914007617061145, "learning_rate": 9.983086026828592e-06, "loss": 17.5493, "step": 3032 }, { "epoch": 0.055440802822307934, "grad_norm": 7.286057303199407, "learning_rate": 9.983061690798782e-06, "loss": 17.8788, "step": 3033 }, { "epoch": 0.055459082018754456, "grad_norm": 7.314665473423591, "learning_rate": 9.98303733730377e-06, "loss": 18.1814, "step": 3034 }, { "epoch": 0.05547736121520098, "grad_norm": 8.666248371560606, "learning_rate": 9.983012966343645e-06, "loss": 18.5522, "step": 3035 }, { "epoch": 0.05549564041164751, "grad_norm": 6.389428773229833, "learning_rate": 9.982988577918494e-06, "loss": 17.7468, "step": 3036 }, { "epoch": 0.05551391960809403, "grad_norm": 6.422842320255038, "learning_rate": 9.9829641720284e-06, "loss": 17.5976, "step": 3037 }, { "epoch": 0.05553219880454055, "grad_norm": 8.907083855297007, "learning_rate": 9.982939748673447e-06, "loss": 18.4818, "step": 3038 }, { "epoch": 0.05555047800098708, "grad_norm": 6.595998114701068, "learning_rate": 9.982915307853725e-06, "loss": 17.5303, "step": 3039 }, { "epoch": 0.0555687571974336, "grad_norm": 8.342567757686064, "learning_rate": 9.982890849569318e-06, "loss": 18.3321, "step": 3040 }, { "epoch": 0.05558703639388012, "grad_norm": 7.3346381271810825, "learning_rate": 9.982866373820308e-06, "loss": 17.9998, "step": 3041 }, { "epoch": 0.05560531559032665, "grad_norm": 8.02451992770075, "learning_rate": 9.982841880606786e-06, "loss": 17.886, "step": 3042 }, { "epoch": 0.05562359478677317, "grad_norm": 6.86313206794148, "learning_rate": 9.982817369928834e-06, "loss": 17.8138, "step": 3043 }, { "epoch": 0.055641873983219695, "grad_norm": 7.792835334586353, "learning_rate": 9.98279284178654e-06, "loss": 18.1808, "step": 3044 }, { "epoch": 0.05566015317966622, "grad_norm": 8.365750307803037, "learning_rate": 9.982768296179989e-06, "loss": 18.1171, "step": 3045 }, { "epoch": 0.055678432376112745, "grad_norm": 9.19430205300137, "learning_rate": 9.98274373310927e-06, "loss": 18.6058, "step": 3046 }, { "epoch": 0.055696711572559274, "grad_norm": 9.578064016682275, "learning_rate": 9.982719152574465e-06, "loss": 18.844, "step": 3047 }, { "epoch": 0.055714990769005796, "grad_norm": 7.766110325583367, "learning_rate": 9.982694554575661e-06, "loss": 18.0555, "step": 3048 }, { "epoch": 0.05573326996545232, "grad_norm": 8.788885331065469, "learning_rate": 9.982669939112946e-06, "loss": 18.2255, "step": 3049 }, { "epoch": 0.055751549161898846, "grad_norm": 6.953589933728604, "learning_rate": 9.982645306186405e-06, "loss": 17.6454, "step": 3050 }, { "epoch": 0.05576982835834537, "grad_norm": 8.833252919081332, "learning_rate": 9.982620655796123e-06, "loss": 18.5215, "step": 3051 }, { "epoch": 0.05578810755479189, "grad_norm": 7.84034395593779, "learning_rate": 9.98259598794219e-06, "loss": 18.1439, "step": 3052 }, { "epoch": 0.05580638675123842, "grad_norm": 6.8936031465011185, "learning_rate": 9.982571302624691e-06, "loss": 17.6117, "step": 3053 }, { "epoch": 0.05582466594768494, "grad_norm": 7.101308281568558, "learning_rate": 9.982546599843709e-06, "loss": 17.7474, "step": 3054 }, { "epoch": 0.05584294514413146, "grad_norm": 9.115871047523548, "learning_rate": 9.982521879599337e-06, "loss": 18.3247, "step": 3055 }, { "epoch": 0.05586122434057799, "grad_norm": 7.753280953249115, "learning_rate": 9.982497141891656e-06, "loss": 17.8628, "step": 3056 }, { "epoch": 0.05587950353702451, "grad_norm": 8.13964987987562, "learning_rate": 9.982472386720754e-06, "loss": 18.2396, "step": 3057 }, { "epoch": 0.055897782733471034, "grad_norm": 7.772282244173961, "learning_rate": 9.982447614086721e-06, "loss": 18.3326, "step": 3058 }, { "epoch": 0.05591606192991756, "grad_norm": 8.526465003179315, "learning_rate": 9.98242282398964e-06, "loss": 18.188, "step": 3059 }, { "epoch": 0.055934341126364084, "grad_norm": 6.492358861107072, "learning_rate": 9.982398016429599e-06, "loss": 17.7256, "step": 3060 }, { "epoch": 0.055952620322810606, "grad_norm": 7.825082106363907, "learning_rate": 9.982373191406687e-06, "loss": 17.8356, "step": 3061 }, { "epoch": 0.055970899519257135, "grad_norm": 7.567674437585254, "learning_rate": 9.982348348920988e-06, "loss": 17.7985, "step": 3062 }, { "epoch": 0.05598917871570366, "grad_norm": 7.691814573161281, "learning_rate": 9.982323488972592e-06, "loss": 18.0445, "step": 3063 }, { "epoch": 0.056007457912150185, "grad_norm": 8.31240527788603, "learning_rate": 9.982298611561583e-06, "loss": 18.3168, "step": 3064 }, { "epoch": 0.05602573710859671, "grad_norm": 9.123638086422385, "learning_rate": 9.982273716688048e-06, "loss": 18.5168, "step": 3065 }, { "epoch": 0.05604401630504323, "grad_norm": 7.429816320145203, "learning_rate": 9.982248804352079e-06, "loss": 17.961, "step": 3066 }, { "epoch": 0.05606229550148976, "grad_norm": 7.028700829005101, "learning_rate": 9.98222387455376e-06, "loss": 17.6662, "step": 3067 }, { "epoch": 0.05608057469793628, "grad_norm": 7.397826548373838, "learning_rate": 9.982198927293177e-06, "loss": 17.9261, "step": 3068 }, { "epoch": 0.0560988538943828, "grad_norm": 7.661716198505955, "learning_rate": 9.98217396257042e-06, "loss": 17.9355, "step": 3069 }, { "epoch": 0.05611713309082933, "grad_norm": 7.724760272102188, "learning_rate": 9.982148980385576e-06, "loss": 18.0717, "step": 3070 }, { "epoch": 0.05613541228727585, "grad_norm": 6.8711206059020995, "learning_rate": 9.982123980738731e-06, "loss": 17.8262, "step": 3071 }, { "epoch": 0.05615369148372237, "grad_norm": 8.514880584015112, "learning_rate": 9.982098963629975e-06, "loss": 18.3857, "step": 3072 }, { "epoch": 0.0561719706801689, "grad_norm": 6.537398221025665, "learning_rate": 9.982073929059394e-06, "loss": 17.4544, "step": 3073 }, { "epoch": 0.056190249876615424, "grad_norm": 7.224619991155388, "learning_rate": 9.982048877027077e-06, "loss": 17.7508, "step": 3074 }, { "epoch": 0.056208529073061946, "grad_norm": 8.93773857365674, "learning_rate": 9.98202380753311e-06, "loss": 18.9491, "step": 3075 }, { "epoch": 0.056226808269508474, "grad_norm": 7.119907582320052, "learning_rate": 9.98199872057758e-06, "loss": 17.6432, "step": 3076 }, { "epoch": 0.056245087465954996, "grad_norm": 7.169995182665816, "learning_rate": 9.98197361616058e-06, "loss": 17.4187, "step": 3077 }, { "epoch": 0.05626336666240152, "grad_norm": 7.795123421903371, "learning_rate": 9.981948494282195e-06, "loss": 18.0035, "step": 3078 }, { "epoch": 0.056281645858848046, "grad_norm": 7.726347244191609, "learning_rate": 9.98192335494251e-06, "loss": 18.0938, "step": 3079 }, { "epoch": 0.05629992505529457, "grad_norm": 6.6654710399569606, "learning_rate": 9.98189819814162e-06, "loss": 17.3324, "step": 3080 }, { "epoch": 0.0563182042517411, "grad_norm": 8.028342189665967, "learning_rate": 9.981873023879605e-06, "loss": 17.6997, "step": 3081 }, { "epoch": 0.05633648344818762, "grad_norm": 7.351889325695132, "learning_rate": 9.981847832156559e-06, "loss": 17.9691, "step": 3082 }, { "epoch": 0.05635476264463414, "grad_norm": 6.877054372041403, "learning_rate": 9.981822622972568e-06, "loss": 17.6384, "step": 3083 }, { "epoch": 0.05637304184108067, "grad_norm": 6.459901030651059, "learning_rate": 9.981797396327722e-06, "loss": 17.3577, "step": 3084 }, { "epoch": 0.05639132103752719, "grad_norm": 7.909241642474109, "learning_rate": 9.981772152222109e-06, "loss": 18.504, "step": 3085 }, { "epoch": 0.05640960023397371, "grad_norm": 7.433748334982519, "learning_rate": 9.981746890655815e-06, "loss": 17.7713, "step": 3086 }, { "epoch": 0.05642787943042024, "grad_norm": 6.659830404372923, "learning_rate": 9.981721611628932e-06, "loss": 17.4545, "step": 3087 }, { "epoch": 0.05644615862686676, "grad_norm": 7.765799042403962, "learning_rate": 9.981696315141546e-06, "loss": 17.8737, "step": 3088 }, { "epoch": 0.056464437823313285, "grad_norm": 8.867682423038241, "learning_rate": 9.981671001193748e-06, "loss": 18.1657, "step": 3089 }, { "epoch": 0.056482717019759814, "grad_norm": 6.237795543665309, "learning_rate": 9.981645669785624e-06, "loss": 17.1925, "step": 3090 }, { "epoch": 0.056500996216206335, "grad_norm": 6.925673048313482, "learning_rate": 9.981620320917264e-06, "loss": 17.7825, "step": 3091 }, { "epoch": 0.05651927541265286, "grad_norm": 7.10750480705101, "learning_rate": 9.981594954588759e-06, "loss": 17.7329, "step": 3092 }, { "epoch": 0.056537554609099386, "grad_norm": 7.388764822206102, "learning_rate": 9.981569570800194e-06, "loss": 17.9458, "step": 3093 }, { "epoch": 0.05655583380554591, "grad_norm": 7.695452348366865, "learning_rate": 9.98154416955166e-06, "loss": 17.8532, "step": 3094 }, { "epoch": 0.05657411300199243, "grad_norm": 7.685635568854148, "learning_rate": 9.981518750843247e-06, "loss": 17.948, "step": 3095 }, { "epoch": 0.05659239219843896, "grad_norm": 8.262705401974944, "learning_rate": 9.981493314675044e-06, "loss": 18.4305, "step": 3096 }, { "epoch": 0.05661067139488548, "grad_norm": 8.344981147862454, "learning_rate": 9.981467861047137e-06, "loss": 18.1224, "step": 3097 }, { "epoch": 0.05662895059133201, "grad_norm": 7.611113823112375, "learning_rate": 9.981442389959619e-06, "loss": 17.7372, "step": 3098 }, { "epoch": 0.05664722978777853, "grad_norm": 8.82550241492585, "learning_rate": 9.981416901412577e-06, "loss": 17.7954, "step": 3099 }, { "epoch": 0.05666550898422505, "grad_norm": 6.426091669920725, "learning_rate": 9.9813913954061e-06, "loss": 17.4338, "step": 3100 }, { "epoch": 0.05668378818067158, "grad_norm": 7.6781511310715445, "learning_rate": 9.981365871940281e-06, "loss": 17.8858, "step": 3101 }, { "epoch": 0.0567020673771181, "grad_norm": 8.51942401916345, "learning_rate": 9.981340331015205e-06, "loss": 18.1104, "step": 3102 }, { "epoch": 0.056720346573564624, "grad_norm": 7.303153742323697, "learning_rate": 9.981314772630963e-06, "loss": 17.8026, "step": 3103 }, { "epoch": 0.05673862577001115, "grad_norm": 7.70273749115573, "learning_rate": 9.981289196787646e-06, "loss": 17.8407, "step": 3104 }, { "epoch": 0.056756904966457675, "grad_norm": 7.384810357472592, "learning_rate": 9.981263603485343e-06, "loss": 17.916, "step": 3105 }, { "epoch": 0.056775184162904196, "grad_norm": 6.9427720875237915, "learning_rate": 9.981237992724142e-06, "loss": 17.5603, "step": 3106 }, { "epoch": 0.056793463359350725, "grad_norm": 7.586135612857699, "learning_rate": 9.981212364504135e-06, "loss": 17.96, "step": 3107 }, { "epoch": 0.05681174255579725, "grad_norm": 6.941673895097665, "learning_rate": 9.98118671882541e-06, "loss": 17.7614, "step": 3108 }, { "epoch": 0.05683002175224377, "grad_norm": 7.669828434678138, "learning_rate": 9.98116105568806e-06, "loss": 18.0786, "step": 3109 }, { "epoch": 0.0568483009486903, "grad_norm": 8.214138452932188, "learning_rate": 9.98113537509217e-06, "loss": 18.5006, "step": 3110 }, { "epoch": 0.05686658014513682, "grad_norm": 8.769302543947646, "learning_rate": 9.981109677037834e-06, "loss": 18.7026, "step": 3111 }, { "epoch": 0.05688485934158334, "grad_norm": 6.736288333953572, "learning_rate": 9.981083961525142e-06, "loss": 17.7235, "step": 3112 }, { "epoch": 0.05690313853802987, "grad_norm": 7.525238302445102, "learning_rate": 9.981058228554182e-06, "loss": 17.7236, "step": 3113 }, { "epoch": 0.05692141773447639, "grad_norm": 8.304692348974578, "learning_rate": 9.981032478125044e-06, "loss": 18.3424, "step": 3114 }, { "epoch": 0.05693969693092292, "grad_norm": 7.416249290681871, "learning_rate": 9.981006710237822e-06, "loss": 17.8649, "step": 3115 }, { "epoch": 0.05695797612736944, "grad_norm": 7.713644412755266, "learning_rate": 9.9809809248926e-06, "loss": 18.1338, "step": 3116 }, { "epoch": 0.056976255323815964, "grad_norm": 8.090571385668342, "learning_rate": 9.980955122089476e-06, "loss": 17.7447, "step": 3117 }, { "epoch": 0.05699453452026249, "grad_norm": 6.579862160274033, "learning_rate": 9.980929301828533e-06, "loss": 17.3605, "step": 3118 }, { "epoch": 0.057012813716709014, "grad_norm": 6.459900712847999, "learning_rate": 9.980903464109868e-06, "loss": 17.5769, "step": 3119 }, { "epoch": 0.057031092913155536, "grad_norm": 8.30606420475151, "learning_rate": 9.980877608933566e-06, "loss": 18.0798, "step": 3120 }, { "epoch": 0.057049372109602065, "grad_norm": 7.7536226712048295, "learning_rate": 9.98085173629972e-06, "loss": 18.4216, "step": 3121 }, { "epoch": 0.057067651306048586, "grad_norm": 6.864119640927641, "learning_rate": 9.980825846208424e-06, "loss": 17.6863, "step": 3122 }, { "epoch": 0.05708593050249511, "grad_norm": 8.094039835281302, "learning_rate": 9.980799938659764e-06, "loss": 18.0637, "step": 3123 }, { "epoch": 0.05710420969894164, "grad_norm": 9.114359137978889, "learning_rate": 9.980774013653834e-06, "loss": 18.836, "step": 3124 }, { "epoch": 0.05712248889538816, "grad_norm": 6.761714260561507, "learning_rate": 9.980748071190721e-06, "loss": 17.8163, "step": 3125 }, { "epoch": 0.05714076809183468, "grad_norm": 7.915229109343791, "learning_rate": 9.980722111270518e-06, "loss": 18.1644, "step": 3126 }, { "epoch": 0.05715904728828121, "grad_norm": 7.696451437354202, "learning_rate": 9.980696133893317e-06, "loss": 17.7345, "step": 3127 }, { "epoch": 0.05717732648472773, "grad_norm": 8.703426742138486, "learning_rate": 9.98067013905921e-06, "loss": 18.429, "step": 3128 }, { "epoch": 0.05719560568117425, "grad_norm": 8.743940883294867, "learning_rate": 9.980644126768283e-06, "loss": 18.8959, "step": 3129 }, { "epoch": 0.05721388487762078, "grad_norm": 8.240905753578073, "learning_rate": 9.980618097020634e-06, "loss": 18.2815, "step": 3130 }, { "epoch": 0.0572321640740673, "grad_norm": 7.306110914231443, "learning_rate": 9.98059204981635e-06, "loss": 17.9447, "step": 3131 }, { "epoch": 0.05725044327051383, "grad_norm": 8.189961498568708, "learning_rate": 9.98056598515552e-06, "loss": 18.007, "step": 3132 }, { "epoch": 0.05726872246696035, "grad_norm": 6.871746975009577, "learning_rate": 9.980539903038241e-06, "loss": 17.5626, "step": 3133 }, { "epoch": 0.057287001663406875, "grad_norm": 7.01148624787496, "learning_rate": 9.980513803464602e-06, "loss": 17.7858, "step": 3134 }, { "epoch": 0.057305280859853404, "grad_norm": 7.346460217629107, "learning_rate": 9.980487686434694e-06, "loss": 17.4628, "step": 3135 }, { "epoch": 0.057323560056299926, "grad_norm": 7.5282863277135865, "learning_rate": 9.980461551948609e-06, "loss": 18.0323, "step": 3136 }, { "epoch": 0.05734183925274645, "grad_norm": 8.097465864323018, "learning_rate": 9.980435400006436e-06, "loss": 18.2426, "step": 3137 }, { "epoch": 0.057360118449192976, "grad_norm": 7.932203910254175, "learning_rate": 9.980409230608272e-06, "loss": 17.9906, "step": 3138 }, { "epoch": 0.0573783976456395, "grad_norm": 6.793077500662868, "learning_rate": 9.980383043754206e-06, "loss": 17.822, "step": 3139 }, { "epoch": 0.05739667684208602, "grad_norm": 7.4046220582163, "learning_rate": 9.980356839444328e-06, "loss": 18.009, "step": 3140 }, { "epoch": 0.05741495603853255, "grad_norm": 7.34207226631687, "learning_rate": 9.980330617678731e-06, "loss": 17.8602, "step": 3141 }, { "epoch": 0.05743323523497907, "grad_norm": 8.544845945713869, "learning_rate": 9.980304378457508e-06, "loss": 18.5323, "step": 3142 }, { "epoch": 0.05745151443142559, "grad_norm": 7.644146605020451, "learning_rate": 9.98027812178075e-06, "loss": 17.9759, "step": 3143 }, { "epoch": 0.05746979362787212, "grad_norm": 7.028547691424313, "learning_rate": 9.980251847648551e-06, "loss": 17.8357, "step": 3144 }, { "epoch": 0.05748807282431864, "grad_norm": 7.782382990392942, "learning_rate": 9.980225556061e-06, "loss": 18.0765, "step": 3145 }, { "epoch": 0.057506352020765164, "grad_norm": 7.8146244170079955, "learning_rate": 9.980199247018193e-06, "loss": 18.1027, "step": 3146 }, { "epoch": 0.05752463121721169, "grad_norm": 7.558449598618881, "learning_rate": 9.980172920520216e-06, "loss": 18.4348, "step": 3147 }, { "epoch": 0.057542910413658215, "grad_norm": 8.008248929861436, "learning_rate": 9.980146576567167e-06, "loss": 18.2474, "step": 3148 }, { "epoch": 0.05756118961010474, "grad_norm": 6.320683275914581, "learning_rate": 9.980120215159138e-06, "loss": 17.4358, "step": 3149 }, { "epoch": 0.057579468806551265, "grad_norm": 7.500021336597586, "learning_rate": 9.980093836296216e-06, "loss": 18.0094, "step": 3150 }, { "epoch": 0.05759774800299779, "grad_norm": 6.889861023698829, "learning_rate": 9.9800674399785e-06, "loss": 17.8159, "step": 3151 }, { "epoch": 0.057616027199444315, "grad_norm": 8.129009473177554, "learning_rate": 9.98004102620608e-06, "loss": 18.1608, "step": 3152 }, { "epoch": 0.05763430639589084, "grad_norm": 8.48684312755742, "learning_rate": 9.980014594979047e-06, "loss": 18.0661, "step": 3153 }, { "epoch": 0.05765258559233736, "grad_norm": 7.411642195765292, "learning_rate": 9.979988146297494e-06, "loss": 17.8226, "step": 3154 }, { "epoch": 0.05767086478878389, "grad_norm": 8.940883368045336, "learning_rate": 9.979961680161517e-06, "loss": 18.8211, "step": 3155 }, { "epoch": 0.05768914398523041, "grad_norm": 7.436304395529918, "learning_rate": 9.979935196571207e-06, "loss": 17.8067, "step": 3156 }, { "epoch": 0.05770742318167693, "grad_norm": 7.192700013865126, "learning_rate": 9.979908695526655e-06, "loss": 17.8254, "step": 3157 }, { "epoch": 0.05772570237812346, "grad_norm": 7.848188521253715, "learning_rate": 9.979882177027955e-06, "loss": 17.769, "step": 3158 }, { "epoch": 0.05774398157456998, "grad_norm": 7.25186655023415, "learning_rate": 9.9798556410752e-06, "loss": 17.7228, "step": 3159 }, { "epoch": 0.0577622607710165, "grad_norm": 9.492483714987832, "learning_rate": 9.979829087668483e-06, "loss": 18.5494, "step": 3160 }, { "epoch": 0.05778053996746303, "grad_norm": 8.523093300760328, "learning_rate": 9.979802516807897e-06, "loss": 18.058, "step": 3161 }, { "epoch": 0.057798819163909554, "grad_norm": 9.087293568614113, "learning_rate": 9.979775928493536e-06, "loss": 18.4453, "step": 3162 }, { "epoch": 0.057817098360356076, "grad_norm": 7.340983964773885, "learning_rate": 9.979749322725492e-06, "loss": 17.9543, "step": 3163 }, { "epoch": 0.057835377556802604, "grad_norm": 8.117419409975199, "learning_rate": 9.979722699503859e-06, "loss": 18.4618, "step": 3164 }, { "epoch": 0.057853656753249126, "grad_norm": 8.060086976673936, "learning_rate": 9.97969605882873e-06, "loss": 18.0255, "step": 3165 }, { "epoch": 0.057871935949695655, "grad_norm": 6.691453776028693, "learning_rate": 9.979669400700198e-06, "loss": 17.5696, "step": 3166 }, { "epoch": 0.05789021514614218, "grad_norm": 7.723493050490721, "learning_rate": 9.979642725118358e-06, "loss": 18.1638, "step": 3167 }, { "epoch": 0.0579084943425887, "grad_norm": 7.910569492674526, "learning_rate": 9.979616032083301e-06, "loss": 18.0093, "step": 3168 }, { "epoch": 0.05792677353903523, "grad_norm": 7.362247485355764, "learning_rate": 9.979589321595123e-06, "loss": 17.5434, "step": 3169 }, { "epoch": 0.05794505273548175, "grad_norm": 7.540447907965872, "learning_rate": 9.979562593653916e-06, "loss": 18.1084, "step": 3170 }, { "epoch": 0.05796333193192827, "grad_norm": 7.413722488365144, "learning_rate": 9.979535848259775e-06, "loss": 17.7816, "step": 3171 }, { "epoch": 0.0579816111283748, "grad_norm": 8.349769595748214, "learning_rate": 9.979509085412793e-06, "loss": 18.3193, "step": 3172 }, { "epoch": 0.05799989032482132, "grad_norm": 7.2552155432041365, "learning_rate": 9.979482305113062e-06, "loss": 17.6679, "step": 3173 }, { "epoch": 0.05801816952126784, "grad_norm": 9.039718750437268, "learning_rate": 9.979455507360679e-06, "loss": 18.6886, "step": 3174 }, { "epoch": 0.05803644871771437, "grad_norm": 6.273050204214056, "learning_rate": 9.979428692155737e-06, "loss": 17.3761, "step": 3175 }, { "epoch": 0.05805472791416089, "grad_norm": 6.715594056949398, "learning_rate": 9.979401859498327e-06, "loss": 17.3928, "step": 3176 }, { "epoch": 0.058073007110607415, "grad_norm": 6.650037070977743, "learning_rate": 9.979375009388548e-06, "loss": 17.7325, "step": 3177 }, { "epoch": 0.058091286307053944, "grad_norm": 6.673201876651628, "learning_rate": 9.979348141826491e-06, "loss": 17.5246, "step": 3178 }, { "epoch": 0.058109565503500465, "grad_norm": 7.527923065096524, "learning_rate": 9.979321256812252e-06, "loss": 18.2962, "step": 3179 }, { "epoch": 0.05812784469994699, "grad_norm": 9.008414939763789, "learning_rate": 9.979294354345923e-06, "loss": 18.6181, "step": 3180 }, { "epoch": 0.058146123896393516, "grad_norm": 7.737827894858357, "learning_rate": 9.979267434427599e-06, "loss": 17.8865, "step": 3181 }, { "epoch": 0.05816440309284004, "grad_norm": 7.435350935716762, "learning_rate": 9.979240497057374e-06, "loss": 18.1134, "step": 3182 }, { "epoch": 0.058182682289286566, "grad_norm": 6.392868758180979, "learning_rate": 9.979213542235346e-06, "loss": 17.4657, "step": 3183 }, { "epoch": 0.05820096148573309, "grad_norm": 8.061051245157545, "learning_rate": 9.979186569961603e-06, "loss": 18.4981, "step": 3184 }, { "epoch": 0.05821924068217961, "grad_norm": 7.574884996174679, "learning_rate": 9.979159580236246e-06, "loss": 17.8623, "step": 3185 }, { "epoch": 0.05823751987862614, "grad_norm": 6.972585575102587, "learning_rate": 9.979132573059366e-06, "loss": 17.7043, "step": 3186 }, { "epoch": 0.05825579907507266, "grad_norm": 8.01277178504557, "learning_rate": 9.979105548431058e-06, "loss": 18.2272, "step": 3187 }, { "epoch": 0.05827407827151918, "grad_norm": 6.883657298753507, "learning_rate": 9.979078506351418e-06, "loss": 17.5574, "step": 3188 }, { "epoch": 0.05829235746796571, "grad_norm": 8.252378597440396, "learning_rate": 9.979051446820539e-06, "loss": 18.0717, "step": 3189 }, { "epoch": 0.05831063666441223, "grad_norm": 9.18754927192144, "learning_rate": 9.979024369838516e-06, "loss": 18.5159, "step": 3190 }, { "epoch": 0.058328915860858754, "grad_norm": 7.617789065932563, "learning_rate": 9.978997275405447e-06, "loss": 18.0219, "step": 3191 }, { "epoch": 0.05834719505730528, "grad_norm": 5.51650755113365, "learning_rate": 9.978970163521422e-06, "loss": 17.0512, "step": 3192 }, { "epoch": 0.058365474253751805, "grad_norm": 7.808301822700816, "learning_rate": 9.978943034186539e-06, "loss": 17.8967, "step": 3193 }, { "epoch": 0.05838375345019833, "grad_norm": 8.157376624839548, "learning_rate": 9.978915887400894e-06, "loss": 18.2762, "step": 3194 }, { "epoch": 0.058402032646644855, "grad_norm": 7.530590281296273, "learning_rate": 9.978888723164581e-06, "loss": 17.754, "step": 3195 }, { "epoch": 0.05842031184309138, "grad_norm": 8.107889478601699, "learning_rate": 9.978861541477694e-06, "loss": 18.3321, "step": 3196 }, { "epoch": 0.0584385910395379, "grad_norm": 7.672274118425343, "learning_rate": 9.978834342340329e-06, "loss": 17.8744, "step": 3197 }, { "epoch": 0.05845687023598443, "grad_norm": 7.50720109128021, "learning_rate": 9.978807125752582e-06, "loss": 18.3519, "step": 3198 }, { "epoch": 0.05847514943243095, "grad_norm": 8.526593217492945, "learning_rate": 9.97877989171455e-06, "loss": 18.2489, "step": 3199 }, { "epoch": 0.05849342862887748, "grad_norm": 9.441229667642398, "learning_rate": 9.978752640226325e-06, "loss": 19.2197, "step": 3200 }, { "epoch": 0.058511707825324, "grad_norm": 8.319443367985162, "learning_rate": 9.978725371288004e-06, "loss": 18.3359, "step": 3201 }, { "epoch": 0.05852998702177052, "grad_norm": 8.011564051634045, "learning_rate": 9.978698084899682e-06, "loss": 18.2131, "step": 3202 }, { "epoch": 0.05854826621821705, "grad_norm": 7.764131198002185, "learning_rate": 9.978670781061457e-06, "loss": 18.3141, "step": 3203 }, { "epoch": 0.05856654541466357, "grad_norm": 7.6742758845209735, "learning_rate": 9.97864345977342e-06, "loss": 18.1446, "step": 3204 }, { "epoch": 0.058584824611110094, "grad_norm": 7.481482628458801, "learning_rate": 9.978616121035672e-06, "loss": 18.2536, "step": 3205 }, { "epoch": 0.05860310380755662, "grad_norm": 8.053813882400595, "learning_rate": 9.978588764848307e-06, "loss": 17.7258, "step": 3206 }, { "epoch": 0.058621383004003144, "grad_norm": 9.181886304858015, "learning_rate": 9.97856139121142e-06, "loss": 18.8872, "step": 3207 }, { "epoch": 0.058639662200449666, "grad_norm": 6.952728968364468, "learning_rate": 9.978534000125106e-06, "loss": 17.8382, "step": 3208 }, { "epoch": 0.058657941396896195, "grad_norm": 6.639926778610925, "learning_rate": 9.978506591589463e-06, "loss": 17.7191, "step": 3209 }, { "epoch": 0.058676220593342716, "grad_norm": 7.467723950323885, "learning_rate": 9.978479165604586e-06, "loss": 18.1541, "step": 3210 }, { "epoch": 0.05869449978978924, "grad_norm": 8.352101402192078, "learning_rate": 9.978451722170572e-06, "loss": 17.8843, "step": 3211 }, { "epoch": 0.05871277898623577, "grad_norm": 6.614723848081901, "learning_rate": 9.978424261287518e-06, "loss": 17.4353, "step": 3212 }, { "epoch": 0.05873105818268229, "grad_norm": 9.070620645938524, "learning_rate": 9.978396782955518e-06, "loss": 19.1536, "step": 3213 }, { "epoch": 0.05874933737912881, "grad_norm": 8.718314512695132, "learning_rate": 9.978369287174668e-06, "loss": 18.5638, "step": 3214 }, { "epoch": 0.05876761657557534, "grad_norm": 6.716483752096194, "learning_rate": 9.978341773945067e-06, "loss": 17.8021, "step": 3215 }, { "epoch": 0.05878589577202186, "grad_norm": 8.165609762942184, "learning_rate": 9.97831424326681e-06, "loss": 18.0288, "step": 3216 }, { "epoch": 0.05880417496846839, "grad_norm": 6.437159544469477, "learning_rate": 9.978286695139993e-06, "loss": 17.4915, "step": 3217 }, { "epoch": 0.05882245416491491, "grad_norm": 7.366466706580026, "learning_rate": 9.978259129564713e-06, "loss": 17.7899, "step": 3218 }, { "epoch": 0.05884073336136143, "grad_norm": 9.018297973451752, "learning_rate": 9.978231546541069e-06, "loss": 18.2572, "step": 3219 }, { "epoch": 0.05885901255780796, "grad_norm": 8.295663361723875, "learning_rate": 9.978203946069154e-06, "loss": 18.2259, "step": 3220 }, { "epoch": 0.058877291754254484, "grad_norm": 6.665727972785896, "learning_rate": 9.978176328149064e-06, "loss": 17.5892, "step": 3221 }, { "epoch": 0.058895570950701005, "grad_norm": 9.108174279374897, "learning_rate": 9.9781486927809e-06, "loss": 18.667, "step": 3222 }, { "epoch": 0.058913850147147534, "grad_norm": 7.2191477273365425, "learning_rate": 9.978121039964757e-06, "loss": 17.9186, "step": 3223 }, { "epoch": 0.058932129343594056, "grad_norm": 6.952581207518228, "learning_rate": 9.978093369700733e-06, "loss": 18.0369, "step": 3224 }, { "epoch": 0.05895040854004058, "grad_norm": 9.454293435602219, "learning_rate": 9.978065681988921e-06, "loss": 19.1317, "step": 3225 }, { "epoch": 0.058968687736487106, "grad_norm": 7.661129555256189, "learning_rate": 9.978037976829423e-06, "loss": 18.2276, "step": 3226 }, { "epoch": 0.05898696693293363, "grad_norm": 7.190124056734599, "learning_rate": 9.978010254222332e-06, "loss": 17.7638, "step": 3227 }, { "epoch": 0.05900524612938015, "grad_norm": 6.885324932029891, "learning_rate": 9.977982514167748e-06, "loss": 17.6156, "step": 3228 }, { "epoch": 0.05902352532582668, "grad_norm": 8.836508839201812, "learning_rate": 9.977954756665766e-06, "loss": 18.5187, "step": 3229 }, { "epoch": 0.0590418045222732, "grad_norm": 7.042024853413884, "learning_rate": 9.977926981716486e-06, "loss": 17.7895, "step": 3230 }, { "epoch": 0.05906008371871972, "grad_norm": 7.222072324753219, "learning_rate": 9.977899189320002e-06, "loss": 18.0641, "step": 3231 }, { "epoch": 0.05907836291516625, "grad_norm": 7.789156150808883, "learning_rate": 9.977871379476416e-06, "loss": 18.1854, "step": 3232 }, { "epoch": 0.05909664211161277, "grad_norm": 7.019874445437046, "learning_rate": 9.977843552185822e-06, "loss": 17.5921, "step": 3233 }, { "epoch": 0.0591149213080593, "grad_norm": 6.961227256593004, "learning_rate": 9.977815707448317e-06, "loss": 17.638, "step": 3234 }, { "epoch": 0.05913320050450582, "grad_norm": 7.404831439084697, "learning_rate": 9.977787845264001e-06, "loss": 18.1359, "step": 3235 }, { "epoch": 0.059151479700952345, "grad_norm": 7.6081190895585085, "learning_rate": 9.97775996563297e-06, "loss": 17.9784, "step": 3236 }, { "epoch": 0.05916975889739887, "grad_norm": 8.189798289907229, "learning_rate": 9.977732068555323e-06, "loss": 18.1947, "step": 3237 }, { "epoch": 0.059188038093845395, "grad_norm": 9.603313968659359, "learning_rate": 9.977704154031156e-06, "loss": 18.4913, "step": 3238 }, { "epoch": 0.05920631729029192, "grad_norm": 7.24942481827696, "learning_rate": 9.977676222060568e-06, "loss": 17.6742, "step": 3239 }, { "epoch": 0.059224596486738446, "grad_norm": 7.837441304765053, "learning_rate": 9.977648272643658e-06, "loss": 18.347, "step": 3240 }, { "epoch": 0.05924287568318497, "grad_norm": 7.488596814459485, "learning_rate": 9.977620305780522e-06, "loss": 17.9552, "step": 3241 }, { "epoch": 0.05926115487963149, "grad_norm": 25.05949073017427, "learning_rate": 9.977592321471259e-06, "loss": 18.189, "step": 3242 }, { "epoch": 0.05927943407607802, "grad_norm": 8.342189405871983, "learning_rate": 9.977564319715966e-06, "loss": 17.5618, "step": 3243 }, { "epoch": 0.05929771327252454, "grad_norm": 9.221504010474346, "learning_rate": 9.977536300514742e-06, "loss": 17.8729, "step": 3244 }, { "epoch": 0.05931599246897106, "grad_norm": 10.218891821200936, "learning_rate": 9.977508263867688e-06, "loss": 18.9667, "step": 3245 }, { "epoch": 0.05933427166541759, "grad_norm": 8.044011082674539, "learning_rate": 9.977480209774897e-06, "loss": 18.1998, "step": 3246 }, { "epoch": 0.05935255086186411, "grad_norm": 8.786931638511275, "learning_rate": 9.977452138236469e-06, "loss": 17.7495, "step": 3247 }, { "epoch": 0.059370830058310634, "grad_norm": 7.66967034121712, "learning_rate": 9.977424049252504e-06, "loss": 17.4421, "step": 3248 }, { "epoch": 0.05938910925475716, "grad_norm": 7.727496643488349, "learning_rate": 9.9773959428231e-06, "loss": 18.2082, "step": 3249 }, { "epoch": 0.059407388451203684, "grad_norm": 8.994041901925366, "learning_rate": 9.977367818948355e-06, "loss": 18.2771, "step": 3250 }, { "epoch": 0.05942566764765021, "grad_norm": 8.026884494537635, "learning_rate": 9.977339677628369e-06, "loss": 17.595, "step": 3251 }, { "epoch": 0.059443946844096734, "grad_norm": 7.570263993485957, "learning_rate": 9.977311518863237e-06, "loss": 17.6949, "step": 3252 }, { "epoch": 0.059462226040543256, "grad_norm": 7.846573858742153, "learning_rate": 9.97728334265306e-06, "loss": 18.1315, "step": 3253 }, { "epoch": 0.059480505236989785, "grad_norm": 9.346891470386613, "learning_rate": 9.97725514899794e-06, "loss": 18.8245, "step": 3254 }, { "epoch": 0.05949878443343631, "grad_norm": 8.173897293518895, "learning_rate": 9.97722693789797e-06, "loss": 17.6173, "step": 3255 }, { "epoch": 0.05951706362988283, "grad_norm": 6.700094234008094, "learning_rate": 9.97719870935325e-06, "loss": 17.3285, "step": 3256 }, { "epoch": 0.05953534282632936, "grad_norm": 7.4321584915430785, "learning_rate": 9.977170463363883e-06, "loss": 17.7529, "step": 3257 }, { "epoch": 0.05955362202277588, "grad_norm": 8.382330015612283, "learning_rate": 9.977142199929965e-06, "loss": 17.927, "step": 3258 }, { "epoch": 0.0595719012192224, "grad_norm": 7.894075544574096, "learning_rate": 9.977113919051595e-06, "loss": 17.815, "step": 3259 }, { "epoch": 0.05959018041566893, "grad_norm": 8.098918306058742, "learning_rate": 9.977085620728875e-06, "loss": 18.2385, "step": 3260 }, { "epoch": 0.05960845961211545, "grad_norm": 8.068916376934771, "learning_rate": 9.977057304961899e-06, "loss": 17.9026, "step": 3261 }, { "epoch": 0.05962673880856197, "grad_norm": 8.10641133511313, "learning_rate": 9.977028971750769e-06, "loss": 18.2146, "step": 3262 }, { "epoch": 0.0596450180050085, "grad_norm": 6.512236878155436, "learning_rate": 9.977000621095585e-06, "loss": 17.4373, "step": 3263 }, { "epoch": 0.05966329720145502, "grad_norm": 8.71608684189703, "learning_rate": 9.976972252996447e-06, "loss": 18.4464, "step": 3264 }, { "epoch": 0.059681576397901545, "grad_norm": 7.281434667691815, "learning_rate": 9.976943867453452e-06, "loss": 17.6039, "step": 3265 }, { "epoch": 0.059699855594348074, "grad_norm": 8.526349343559344, "learning_rate": 9.9769154644667e-06, "loss": 18.5155, "step": 3266 }, { "epoch": 0.059718134790794596, "grad_norm": 7.266321725012322, "learning_rate": 9.976887044036291e-06, "loss": 17.7049, "step": 3267 }, { "epoch": 0.059736413987241124, "grad_norm": 8.96036466200305, "learning_rate": 9.976858606162326e-06, "loss": 18.1321, "step": 3268 }, { "epoch": 0.059754693183687646, "grad_norm": 9.878808320607032, "learning_rate": 9.976830150844902e-06, "loss": 18.5663, "step": 3269 }, { "epoch": 0.05977297238013417, "grad_norm": 8.15912469902459, "learning_rate": 9.976801678084123e-06, "loss": 17.9054, "step": 3270 }, { "epoch": 0.059791251576580697, "grad_norm": 6.4488928278431255, "learning_rate": 9.976773187880083e-06, "loss": 17.4122, "step": 3271 }, { "epoch": 0.05980953077302722, "grad_norm": 8.785649582603085, "learning_rate": 9.976744680232886e-06, "loss": 18.1126, "step": 3272 }, { "epoch": 0.05982780996947374, "grad_norm": 8.9666925267377, "learning_rate": 9.976716155142632e-06, "loss": 18.475, "step": 3273 }, { "epoch": 0.05984608916592027, "grad_norm": 8.085738504039458, "learning_rate": 9.976687612609417e-06, "loss": 18.0753, "step": 3274 }, { "epoch": 0.05986436836236679, "grad_norm": 8.834988133785304, "learning_rate": 9.976659052633347e-06, "loss": 18.6388, "step": 3275 }, { "epoch": 0.05988264755881331, "grad_norm": 7.149216722572933, "learning_rate": 9.976630475214515e-06, "loss": 17.5655, "step": 3276 }, { "epoch": 0.05990092675525984, "grad_norm": 7.248163919124378, "learning_rate": 9.976601880353028e-06, "loss": 17.8018, "step": 3277 }, { "epoch": 0.05991920595170636, "grad_norm": 7.903396617238845, "learning_rate": 9.976573268048983e-06, "loss": 18.1317, "step": 3278 }, { "epoch": 0.059937485148152884, "grad_norm": 7.2367672796718105, "learning_rate": 9.97654463830248e-06, "loss": 17.8175, "step": 3279 }, { "epoch": 0.05995576434459941, "grad_norm": 7.61388358163043, "learning_rate": 9.97651599111362e-06, "loss": 17.9395, "step": 3280 }, { "epoch": 0.059974043541045935, "grad_norm": 6.776332874611314, "learning_rate": 9.976487326482503e-06, "loss": 17.7383, "step": 3281 }, { "epoch": 0.05999232273749246, "grad_norm": 8.476125130232775, "learning_rate": 9.976458644409231e-06, "loss": 18.3575, "step": 3282 }, { "epoch": 0.060010601933938985, "grad_norm": 7.487495575125886, "learning_rate": 9.976429944893902e-06, "loss": 17.8411, "step": 3283 }, { "epoch": 0.06002888113038551, "grad_norm": 7.629057785867848, "learning_rate": 9.976401227936616e-06, "loss": 17.9797, "step": 3284 }, { "epoch": 0.060047160326832036, "grad_norm": 7.9242789683691015, "learning_rate": 9.976372493537479e-06, "loss": 17.9919, "step": 3285 }, { "epoch": 0.06006543952327856, "grad_norm": 7.6290860103886695, "learning_rate": 9.976343741696586e-06, "loss": 17.9728, "step": 3286 }, { "epoch": 0.06008371871972508, "grad_norm": 7.552164863018478, "learning_rate": 9.97631497241404e-06, "loss": 17.7703, "step": 3287 }, { "epoch": 0.06010199791617161, "grad_norm": 7.17350157489045, "learning_rate": 9.976286185689944e-06, "loss": 17.8132, "step": 3288 }, { "epoch": 0.06012027711261813, "grad_norm": 7.168377024849365, "learning_rate": 9.976257381524396e-06, "loss": 17.771, "step": 3289 }, { "epoch": 0.06013855630906465, "grad_norm": 9.442343423631371, "learning_rate": 9.976228559917497e-06, "loss": 18.45, "step": 3290 }, { "epoch": 0.06015683550551118, "grad_norm": 7.685395625931217, "learning_rate": 9.976199720869348e-06, "loss": 18.0959, "step": 3291 }, { "epoch": 0.0601751147019577, "grad_norm": 7.789790910507743, "learning_rate": 9.976170864380052e-06, "loss": 18.2507, "step": 3292 }, { "epoch": 0.060193393898404224, "grad_norm": 6.91393548772296, "learning_rate": 9.976141990449708e-06, "loss": 17.4989, "step": 3293 }, { "epoch": 0.06021167309485075, "grad_norm": 7.900983241934687, "learning_rate": 9.97611309907842e-06, "loss": 18.2346, "step": 3294 }, { "epoch": 0.060229952291297274, "grad_norm": 10.003760275284977, "learning_rate": 9.976084190266286e-06, "loss": 19.1263, "step": 3295 }, { "epoch": 0.060248231487743796, "grad_norm": 8.209602792150497, "learning_rate": 9.976055264013408e-06, "loss": 18.0399, "step": 3296 }, { "epoch": 0.060266510684190325, "grad_norm": 8.706734941265879, "learning_rate": 9.97602632031989e-06, "loss": 18.4225, "step": 3297 }, { "epoch": 0.060284789880636847, "grad_norm": 8.016640715379337, "learning_rate": 9.97599735918583e-06, "loss": 18.3653, "step": 3298 }, { "epoch": 0.06030306907708337, "grad_norm": 6.334820181175187, "learning_rate": 9.975968380611332e-06, "loss": 17.2041, "step": 3299 }, { "epoch": 0.0603213482735299, "grad_norm": 7.578792617036335, "learning_rate": 9.975939384596496e-06, "loss": 17.9529, "step": 3300 }, { "epoch": 0.06033962746997642, "grad_norm": 8.093553982308935, "learning_rate": 9.975910371141424e-06, "loss": 18.4033, "step": 3301 }, { "epoch": 0.06035790666642295, "grad_norm": 8.083567144807205, "learning_rate": 9.975881340246218e-06, "loss": 18.0603, "step": 3302 }, { "epoch": 0.06037618586286947, "grad_norm": 9.014212016579211, "learning_rate": 9.975852291910982e-06, "loss": 17.8603, "step": 3303 }, { "epoch": 0.06039446505931599, "grad_norm": 7.197672327541092, "learning_rate": 9.975823226135813e-06, "loss": 17.8271, "step": 3304 }, { "epoch": 0.06041274425576252, "grad_norm": 6.384490700127685, "learning_rate": 9.975794142920815e-06, "loss": 17.4841, "step": 3305 }, { "epoch": 0.06043102345220904, "grad_norm": 7.368130733758599, "learning_rate": 9.975765042266091e-06, "loss": 18.0889, "step": 3306 }, { "epoch": 0.06044930264865556, "grad_norm": 6.495304240788988, "learning_rate": 9.975735924171744e-06, "loss": 17.2142, "step": 3307 }, { "epoch": 0.06046758184510209, "grad_norm": 6.956934837553296, "learning_rate": 9.97570678863787e-06, "loss": 17.6698, "step": 3308 }, { "epoch": 0.060485861041548614, "grad_norm": 8.587591323241254, "learning_rate": 9.97567763566458e-06, "loss": 18.2999, "step": 3309 }, { "epoch": 0.060504140237995135, "grad_norm": 8.128712306915489, "learning_rate": 9.97564846525197e-06, "loss": 18.3157, "step": 3310 }, { "epoch": 0.060522419434441664, "grad_norm": 6.778574663367813, "learning_rate": 9.975619277400144e-06, "loss": 17.5754, "step": 3311 }, { "epoch": 0.060540698630888186, "grad_norm": 7.144848975595105, "learning_rate": 9.975590072109205e-06, "loss": 17.6057, "step": 3312 }, { "epoch": 0.06055897782733471, "grad_norm": 6.863204769693211, "learning_rate": 9.975560849379253e-06, "loss": 17.6446, "step": 3313 }, { "epoch": 0.060577257023781236, "grad_norm": 6.954857254229112, "learning_rate": 9.975531609210393e-06, "loss": 17.3515, "step": 3314 }, { "epoch": 0.06059553622022776, "grad_norm": 7.387071064673921, "learning_rate": 9.975502351602726e-06, "loss": 17.9307, "step": 3315 }, { "epoch": 0.06061381541667428, "grad_norm": 7.204790219760822, "learning_rate": 9.975473076556355e-06, "loss": 17.552, "step": 3316 }, { "epoch": 0.06063209461312081, "grad_norm": 7.034817915669464, "learning_rate": 9.975443784071383e-06, "loss": 17.6357, "step": 3317 }, { "epoch": 0.06065037380956733, "grad_norm": 7.178123674791742, "learning_rate": 9.975414474147911e-06, "loss": 17.7269, "step": 3318 }, { "epoch": 0.06066865300601386, "grad_norm": 7.519903671620943, "learning_rate": 9.975385146786044e-06, "loss": 17.9158, "step": 3319 }, { "epoch": 0.06068693220246038, "grad_norm": 7.304931624101692, "learning_rate": 9.975355801985885e-06, "loss": 17.7184, "step": 3320 }, { "epoch": 0.0607052113989069, "grad_norm": 6.743292187568647, "learning_rate": 9.975326439747534e-06, "loss": 17.5652, "step": 3321 }, { "epoch": 0.06072349059535343, "grad_norm": 9.112577517205871, "learning_rate": 9.975297060071097e-06, "loss": 18.6824, "step": 3322 }, { "epoch": 0.06074176979179995, "grad_norm": 7.689219605823097, "learning_rate": 9.975267662956674e-06, "loss": 17.776, "step": 3323 }, { "epoch": 0.060760048988246475, "grad_norm": 8.972564384176065, "learning_rate": 9.97523824840437e-06, "loss": 18.6782, "step": 3324 }, { "epoch": 0.060778328184693, "grad_norm": 7.75198143338471, "learning_rate": 9.975208816414288e-06, "loss": 17.868, "step": 3325 }, { "epoch": 0.060796607381139525, "grad_norm": 6.605808021525001, "learning_rate": 9.97517936698653e-06, "loss": 17.6094, "step": 3326 }, { "epoch": 0.06081488657758605, "grad_norm": 6.495524699742615, "learning_rate": 9.975149900121201e-06, "loss": 17.3402, "step": 3327 }, { "epoch": 0.060833165774032576, "grad_norm": 8.519181664837557, "learning_rate": 9.975120415818403e-06, "loss": 18.2129, "step": 3328 }, { "epoch": 0.0608514449704791, "grad_norm": 6.830774350900405, "learning_rate": 9.97509091407824e-06, "loss": 17.6935, "step": 3329 }, { "epoch": 0.06086972416692562, "grad_norm": 6.619043740946671, "learning_rate": 9.975061394900814e-06, "loss": 17.4408, "step": 3330 }, { "epoch": 0.06088800336337215, "grad_norm": 7.538531552487765, "learning_rate": 9.97503185828623e-06, "loss": 18.0834, "step": 3331 }, { "epoch": 0.06090628255981867, "grad_norm": 8.20921101532402, "learning_rate": 9.975002304234593e-06, "loss": 18.2413, "step": 3332 }, { "epoch": 0.06092456175626519, "grad_norm": 9.01460801019503, "learning_rate": 9.974972732746002e-06, "loss": 18.0666, "step": 3333 }, { "epoch": 0.06094284095271172, "grad_norm": 7.724620821664519, "learning_rate": 9.974943143820564e-06, "loss": 17.9919, "step": 3334 }, { "epoch": 0.06096112014915824, "grad_norm": 7.1984340650835215, "learning_rate": 9.974913537458384e-06, "loss": 17.9003, "step": 3335 }, { "epoch": 0.06097939934560477, "grad_norm": 6.1580223199414, "learning_rate": 9.974883913659561e-06, "loss": 17.3667, "step": 3336 }, { "epoch": 0.06099767854205129, "grad_norm": 7.75991150855965, "learning_rate": 9.974854272424203e-06, "loss": 17.8572, "step": 3337 }, { "epoch": 0.061015957738497814, "grad_norm": 7.0164285715245605, "learning_rate": 9.974824613752412e-06, "loss": 17.5536, "step": 3338 }, { "epoch": 0.06103423693494434, "grad_norm": 7.859173707418357, "learning_rate": 9.974794937644292e-06, "loss": 17.8944, "step": 3339 }, { "epoch": 0.061052516131390865, "grad_norm": 7.5441465924666735, "learning_rate": 9.97476524409995e-06, "loss": 17.7754, "step": 3340 }, { "epoch": 0.061070795327837386, "grad_norm": 6.853068682320698, "learning_rate": 9.974735533119485e-06, "loss": 17.7593, "step": 3341 }, { "epoch": 0.061089074524283915, "grad_norm": 7.482705010266999, "learning_rate": 9.974705804703002e-06, "loss": 17.8316, "step": 3342 }, { "epoch": 0.06110735372073044, "grad_norm": 8.469190246912817, "learning_rate": 9.97467605885061e-06, "loss": 18.1225, "step": 3343 }, { "epoch": 0.06112563291717696, "grad_norm": 6.541346402218264, "learning_rate": 9.97464629556241e-06, "loss": 17.1778, "step": 3344 }, { "epoch": 0.06114391211362349, "grad_norm": 8.319928154406021, "learning_rate": 9.974616514838504e-06, "loss": 18.5495, "step": 3345 }, { "epoch": 0.06116219131007001, "grad_norm": 8.379862459573424, "learning_rate": 9.974586716679e-06, "loss": 18.2181, "step": 3346 }, { "epoch": 0.06118047050651653, "grad_norm": 8.57272529587977, "learning_rate": 9.974556901084002e-06, "loss": 18.0759, "step": 3347 }, { "epoch": 0.06119874970296306, "grad_norm": 9.490933256616948, "learning_rate": 9.974527068053613e-06, "loss": 18.5071, "step": 3348 }, { "epoch": 0.06121702889940958, "grad_norm": 8.791459883568644, "learning_rate": 9.97449721758794e-06, "loss": 18.239, "step": 3349 }, { "epoch": 0.0612353080958561, "grad_norm": 7.428366883134674, "learning_rate": 9.974467349687082e-06, "loss": 17.9357, "step": 3350 }, { "epoch": 0.06125358729230263, "grad_norm": 9.111407809345186, "learning_rate": 9.974437464351151e-06, "loss": 18.6755, "step": 3351 }, { "epoch": 0.061271866488749153, "grad_norm": 8.635498076159992, "learning_rate": 9.974407561580248e-06, "loss": 18.2551, "step": 3352 }, { "epoch": 0.06129014568519568, "grad_norm": 6.650086668964261, "learning_rate": 9.974377641374477e-06, "loss": 17.3573, "step": 3353 }, { "epoch": 0.061308424881642204, "grad_norm": 7.135388716063664, "learning_rate": 9.974347703733945e-06, "loss": 17.7386, "step": 3354 }, { "epoch": 0.061326704078088726, "grad_norm": 7.224218446437377, "learning_rate": 9.974317748658754e-06, "loss": 17.7415, "step": 3355 }, { "epoch": 0.061344983274535254, "grad_norm": 6.7564680913700315, "learning_rate": 9.974287776149013e-06, "loss": 17.8325, "step": 3356 }, { "epoch": 0.061363262470981776, "grad_norm": 6.540021535054071, "learning_rate": 9.974257786204826e-06, "loss": 17.4231, "step": 3357 }, { "epoch": 0.0613815416674283, "grad_norm": 6.815830767117462, "learning_rate": 9.974227778826296e-06, "loss": 17.4986, "step": 3358 }, { "epoch": 0.06139982086387483, "grad_norm": 7.129445299972657, "learning_rate": 9.974197754013527e-06, "loss": 17.8041, "step": 3359 }, { "epoch": 0.06141810006032135, "grad_norm": 7.153791344324305, "learning_rate": 9.974167711766629e-06, "loss": 17.6922, "step": 3360 }, { "epoch": 0.06143637925676787, "grad_norm": 8.150271325666516, "learning_rate": 9.974137652085705e-06, "loss": 18.0894, "step": 3361 }, { "epoch": 0.0614546584532144, "grad_norm": 6.76458176038737, "learning_rate": 9.974107574970858e-06, "loss": 17.9596, "step": 3362 }, { "epoch": 0.06147293764966092, "grad_norm": 6.562435612957236, "learning_rate": 9.974077480422197e-06, "loss": 17.5366, "step": 3363 }, { "epoch": 0.06149121684610744, "grad_norm": 8.310894250512407, "learning_rate": 9.974047368439827e-06, "loss": 17.8685, "step": 3364 }, { "epoch": 0.06150949604255397, "grad_norm": 6.424976980092459, "learning_rate": 9.974017239023851e-06, "loss": 17.3777, "step": 3365 }, { "epoch": 0.06152777523900049, "grad_norm": 9.159875494056582, "learning_rate": 9.973987092174377e-06, "loss": 18.3677, "step": 3366 }, { "epoch": 0.061546054435447015, "grad_norm": 9.207276557072449, "learning_rate": 9.97395692789151e-06, "loss": 19.1496, "step": 3367 }, { "epoch": 0.06156433363189354, "grad_norm": 8.280780761104374, "learning_rate": 9.973926746175354e-06, "loss": 18.1527, "step": 3368 }, { "epoch": 0.061582612828340065, "grad_norm": 9.105685183822006, "learning_rate": 9.973896547026019e-06, "loss": 18.2787, "step": 3369 }, { "epoch": 0.061600892024786594, "grad_norm": 7.568047989769413, "learning_rate": 9.973866330443606e-06, "loss": 17.6217, "step": 3370 }, { "epoch": 0.061619171221233116, "grad_norm": 8.525715853579365, "learning_rate": 9.973836096428224e-06, "loss": 17.9766, "step": 3371 }, { "epoch": 0.06163745041767964, "grad_norm": 8.70039255709809, "learning_rate": 9.973805844979978e-06, "loss": 18.2261, "step": 3372 }, { "epoch": 0.061655729614126166, "grad_norm": 7.227871794954165, "learning_rate": 9.973775576098974e-06, "loss": 17.7765, "step": 3373 }, { "epoch": 0.06167400881057269, "grad_norm": 6.400738094315887, "learning_rate": 9.973745289785318e-06, "loss": 17.3244, "step": 3374 }, { "epoch": 0.06169228800701921, "grad_norm": 5.9325840074765885, "learning_rate": 9.973714986039117e-06, "loss": 17.1561, "step": 3375 }, { "epoch": 0.06171056720346574, "grad_norm": 7.481540854368121, "learning_rate": 9.973684664860477e-06, "loss": 17.8805, "step": 3376 }, { "epoch": 0.06172884639991226, "grad_norm": 8.170644267216476, "learning_rate": 9.973654326249502e-06, "loss": 18.4836, "step": 3377 }, { "epoch": 0.06174712559635878, "grad_norm": 6.232161029270701, "learning_rate": 9.973623970206302e-06, "loss": 17.2242, "step": 3378 }, { "epoch": 0.06176540479280531, "grad_norm": 8.752431819138764, "learning_rate": 9.97359359673098e-06, "loss": 18.2505, "step": 3379 }, { "epoch": 0.06178368398925183, "grad_norm": 8.109599262971056, "learning_rate": 9.973563205823645e-06, "loss": 17.9687, "step": 3380 }, { "epoch": 0.061801963185698354, "grad_norm": 8.134697727474459, "learning_rate": 9.973532797484403e-06, "loss": 18.2258, "step": 3381 }, { "epoch": 0.06182024238214488, "grad_norm": 7.605585006406203, "learning_rate": 9.973502371713359e-06, "loss": 18.0899, "step": 3382 }, { "epoch": 0.061838521578591404, "grad_norm": 7.353451548277089, "learning_rate": 9.973471928510621e-06, "loss": 17.7314, "step": 3383 }, { "epoch": 0.061856800775037926, "grad_norm": 7.641356288460134, "learning_rate": 9.973441467876298e-06, "loss": 17.9315, "step": 3384 }, { "epoch": 0.061875079971484455, "grad_norm": 7.034745775133391, "learning_rate": 9.97341098981049e-06, "loss": 17.6319, "step": 3385 }, { "epoch": 0.06189335916793098, "grad_norm": 8.052019579235315, "learning_rate": 9.973380494313312e-06, "loss": 18.3079, "step": 3386 }, { "epoch": 0.061911638364377505, "grad_norm": 7.919250964595605, "learning_rate": 9.973349981384864e-06, "loss": 18.1278, "step": 3387 }, { "epoch": 0.06192991756082403, "grad_norm": 7.239696372922274, "learning_rate": 9.973319451025256e-06, "loss": 17.9743, "step": 3388 }, { "epoch": 0.06194819675727055, "grad_norm": 6.624433741140478, "learning_rate": 9.973288903234597e-06, "loss": 17.6247, "step": 3389 }, { "epoch": 0.06196647595371708, "grad_norm": 7.931092143699261, "learning_rate": 9.97325833801299e-06, "loss": 18.0357, "step": 3390 }, { "epoch": 0.0619847551501636, "grad_norm": 7.642311280538421, "learning_rate": 9.973227755360547e-06, "loss": 17.9035, "step": 3391 }, { "epoch": 0.06200303434661012, "grad_norm": 7.292890780594306, "learning_rate": 9.973197155277368e-06, "loss": 17.7307, "step": 3392 }, { "epoch": 0.06202131354305665, "grad_norm": 7.668085599362952, "learning_rate": 9.973166537763568e-06, "loss": 17.962, "step": 3393 }, { "epoch": 0.06203959273950317, "grad_norm": 7.360767722070579, "learning_rate": 9.973135902819249e-06, "loss": 17.6829, "step": 3394 }, { "epoch": 0.06205787193594969, "grad_norm": 8.017153441352935, "learning_rate": 9.973105250444522e-06, "loss": 18.1401, "step": 3395 }, { "epoch": 0.06207615113239622, "grad_norm": 7.94897649970752, "learning_rate": 9.97307458063949e-06, "loss": 17.9615, "step": 3396 }, { "epoch": 0.062094430328842744, "grad_norm": 6.8966507827622445, "learning_rate": 9.973043893404264e-06, "loss": 17.6592, "step": 3397 }, { "epoch": 0.062112709525289266, "grad_norm": 7.875780027846666, "learning_rate": 9.97301318873895e-06, "loss": 18.0234, "step": 3398 }, { "epoch": 0.062130988721735794, "grad_norm": 8.625055502829102, "learning_rate": 9.97298246664366e-06, "loss": 18.2048, "step": 3399 }, { "epoch": 0.062149267918182316, "grad_norm": 9.224413111984235, "learning_rate": 9.972951727118493e-06, "loss": 18.4906, "step": 3400 }, { "epoch": 0.06216754711462884, "grad_norm": 8.224259994090788, "learning_rate": 9.972920970163566e-06, "loss": 17.6905, "step": 3401 }, { "epoch": 0.062185826311075366, "grad_norm": 8.59232117911046, "learning_rate": 9.972890195778982e-06, "loss": 18.0538, "step": 3402 }, { "epoch": 0.06220410550752189, "grad_norm": 6.533585167274885, "learning_rate": 9.972859403964848e-06, "loss": 17.3578, "step": 3403 }, { "epoch": 0.06222238470396842, "grad_norm": 8.334520291081128, "learning_rate": 9.972828594721272e-06, "loss": 18.3773, "step": 3404 }, { "epoch": 0.06224066390041494, "grad_norm": 8.596086287415487, "learning_rate": 9.972797768048366e-06, "loss": 18.4815, "step": 3405 }, { "epoch": 0.06225894309686146, "grad_norm": 6.803339182577718, "learning_rate": 9.972766923946233e-06, "loss": 17.5434, "step": 3406 }, { "epoch": 0.06227722229330799, "grad_norm": 8.767755354595359, "learning_rate": 9.972736062414985e-06, "loss": 18.2653, "step": 3407 }, { "epoch": 0.06229550148975451, "grad_norm": 8.634460171921882, "learning_rate": 9.972705183454728e-06, "loss": 17.8145, "step": 3408 }, { "epoch": 0.06231378068620103, "grad_norm": 8.138618267251033, "learning_rate": 9.972674287065572e-06, "loss": 17.9175, "step": 3409 }, { "epoch": 0.06233205988264756, "grad_norm": 8.218118739045496, "learning_rate": 9.972643373247622e-06, "loss": 18.3895, "step": 3410 }, { "epoch": 0.06235033907909408, "grad_norm": 7.523550041610053, "learning_rate": 9.97261244200099e-06, "loss": 17.7614, "step": 3411 }, { "epoch": 0.062368618275540605, "grad_norm": 6.8500248444360405, "learning_rate": 9.972581493325781e-06, "loss": 17.5702, "step": 3412 }, { "epoch": 0.062386897471987134, "grad_norm": 7.924404364025476, "learning_rate": 9.972550527222107e-06, "loss": 18.0084, "step": 3413 }, { "epoch": 0.062405176668433655, "grad_norm": 7.130359191753166, "learning_rate": 9.972519543690076e-06, "loss": 17.6316, "step": 3414 }, { "epoch": 0.06242345586488018, "grad_norm": 6.881607236454959, "learning_rate": 9.972488542729795e-06, "loss": 17.5451, "step": 3415 }, { "epoch": 0.062441735061326706, "grad_norm": 6.049960297311258, "learning_rate": 9.972457524341372e-06, "loss": 17.432, "step": 3416 }, { "epoch": 0.06246001425777323, "grad_norm": 5.9836785326342214, "learning_rate": 9.972426488524916e-06, "loss": 17.3387, "step": 3417 }, { "epoch": 0.06247829345421975, "grad_norm": 7.300580050033733, "learning_rate": 9.972395435280539e-06, "loss": 17.7574, "step": 3418 }, { "epoch": 0.06249657265066628, "grad_norm": 7.969434870318104, "learning_rate": 9.972364364608347e-06, "loss": 18.3107, "step": 3419 }, { "epoch": 0.0625148518471128, "grad_norm": 9.14055506522522, "learning_rate": 9.972333276508449e-06, "loss": 18.5925, "step": 3420 }, { "epoch": 0.06253313104355933, "grad_norm": 8.606727375443928, "learning_rate": 9.972302170980953e-06, "loss": 18.408, "step": 3421 }, { "epoch": 0.06255141024000585, "grad_norm": 7.729437250438963, "learning_rate": 9.97227104802597e-06, "loss": 17.821, "step": 3422 }, { "epoch": 0.06256968943645237, "grad_norm": 7.356468589302877, "learning_rate": 9.97223990764361e-06, "loss": 18.1116, "step": 3423 }, { "epoch": 0.0625879686328989, "grad_norm": 6.923663093282652, "learning_rate": 9.97220874983398e-06, "loss": 17.5378, "step": 3424 }, { "epoch": 0.06260624782934542, "grad_norm": 7.690729163444116, "learning_rate": 9.972177574597188e-06, "loss": 17.9849, "step": 3425 }, { "epoch": 0.06262452702579195, "grad_norm": 7.548743588291021, "learning_rate": 9.972146381933348e-06, "loss": 17.9003, "step": 3426 }, { "epoch": 0.06264280622223847, "grad_norm": 9.100394284286327, "learning_rate": 9.972115171842565e-06, "loss": 18.5092, "step": 3427 }, { "epoch": 0.062661085418685, "grad_norm": 8.163577626288586, "learning_rate": 9.972083944324948e-06, "loss": 18.0888, "step": 3428 }, { "epoch": 0.06267936461513152, "grad_norm": 6.535376515979392, "learning_rate": 9.97205269938061e-06, "loss": 17.5992, "step": 3429 }, { "epoch": 0.06269764381157804, "grad_norm": 8.565121635484441, "learning_rate": 9.972021437009659e-06, "loss": 18.2891, "step": 3430 }, { "epoch": 0.06271592300802457, "grad_norm": 7.981921959868312, "learning_rate": 9.971990157212203e-06, "loss": 17.9579, "step": 3431 }, { "epoch": 0.0627342022044711, "grad_norm": 8.589852885032029, "learning_rate": 9.971958859988356e-06, "loss": 18.361, "step": 3432 }, { "epoch": 0.06275248140091762, "grad_norm": 7.383544479894661, "learning_rate": 9.971927545338222e-06, "loss": 17.6967, "step": 3433 }, { "epoch": 0.06277076059736414, "grad_norm": 8.73758303850346, "learning_rate": 9.971896213261913e-06, "loss": 18.295, "step": 3434 }, { "epoch": 0.06278903979381066, "grad_norm": 7.249676291582927, "learning_rate": 9.971864863759539e-06, "loss": 17.9134, "step": 3435 }, { "epoch": 0.06280731899025718, "grad_norm": 6.715105176458013, "learning_rate": 9.971833496831212e-06, "loss": 17.5182, "step": 3436 }, { "epoch": 0.06282559818670372, "grad_norm": 7.243179566201054, "learning_rate": 9.97180211247704e-06, "loss": 17.8722, "step": 3437 }, { "epoch": 0.06284387738315024, "grad_norm": 9.627863930721766, "learning_rate": 9.971770710697132e-06, "loss": 18.5971, "step": 3438 }, { "epoch": 0.06286215657959676, "grad_norm": 7.2162061993454545, "learning_rate": 9.9717392914916e-06, "loss": 17.6061, "step": 3439 }, { "epoch": 0.06288043577604328, "grad_norm": 7.9558665182817165, "learning_rate": 9.971707854860552e-06, "loss": 17.984, "step": 3440 }, { "epoch": 0.0628987149724898, "grad_norm": 7.062158461069705, "learning_rate": 9.9716764008041e-06, "loss": 17.6622, "step": 3441 }, { "epoch": 0.06291699416893633, "grad_norm": 8.35263761669116, "learning_rate": 9.971644929322352e-06, "loss": 18.0986, "step": 3442 }, { "epoch": 0.06293527336538286, "grad_norm": 6.708141528712058, "learning_rate": 9.971613440415423e-06, "loss": 17.6747, "step": 3443 }, { "epoch": 0.06295355256182938, "grad_norm": 8.084755472624233, "learning_rate": 9.971581934083419e-06, "loss": 18.0422, "step": 3444 }, { "epoch": 0.0629718317582759, "grad_norm": 6.789099319257001, "learning_rate": 9.971550410326452e-06, "loss": 17.6722, "step": 3445 }, { "epoch": 0.06299011095472243, "grad_norm": 8.053466806811533, "learning_rate": 9.971518869144632e-06, "loss": 18.0015, "step": 3446 }, { "epoch": 0.06300839015116895, "grad_norm": 7.713966160652883, "learning_rate": 9.971487310538068e-06, "loss": 17.8359, "step": 3447 }, { "epoch": 0.06302666934761549, "grad_norm": 6.868844053402038, "learning_rate": 9.971455734506875e-06, "loss": 17.6808, "step": 3448 }, { "epoch": 0.06304494854406201, "grad_norm": 7.828485730439966, "learning_rate": 9.97142414105116e-06, "loss": 18.2127, "step": 3449 }, { "epoch": 0.06306322774050853, "grad_norm": 7.415642665524057, "learning_rate": 9.971392530171034e-06, "loss": 17.7132, "step": 3450 }, { "epoch": 0.06308150693695505, "grad_norm": 8.8612358790264, "learning_rate": 9.971360901866609e-06, "loss": 18.7788, "step": 3451 }, { "epoch": 0.06309978613340157, "grad_norm": 7.967788386559646, "learning_rate": 9.971329256137996e-06, "loss": 17.5848, "step": 3452 }, { "epoch": 0.0631180653298481, "grad_norm": 7.6248665283854535, "learning_rate": 9.971297592985305e-06, "loss": 17.7204, "step": 3453 }, { "epoch": 0.06313634452629463, "grad_norm": 7.280807643531678, "learning_rate": 9.971265912408647e-06, "loss": 17.9504, "step": 3454 }, { "epoch": 0.06315462372274115, "grad_norm": 9.04288844435571, "learning_rate": 9.971234214408135e-06, "loss": 18.5277, "step": 3455 }, { "epoch": 0.06317290291918767, "grad_norm": 6.844101706341605, "learning_rate": 9.971202498983878e-06, "loss": 17.5276, "step": 3456 }, { "epoch": 0.0631911821156342, "grad_norm": 6.796439532736444, "learning_rate": 9.971170766135986e-06, "loss": 17.5491, "step": 3457 }, { "epoch": 0.06320946131208072, "grad_norm": 8.0182914134866, "learning_rate": 9.971139015864573e-06, "loss": 18.0436, "step": 3458 }, { "epoch": 0.06322774050852724, "grad_norm": 7.014998252217009, "learning_rate": 9.97110724816975e-06, "loss": 17.7785, "step": 3459 }, { "epoch": 0.06324601970497377, "grad_norm": 7.01435714493046, "learning_rate": 9.971075463051625e-06, "loss": 17.6574, "step": 3460 }, { "epoch": 0.0632642989014203, "grad_norm": 7.469032959375107, "learning_rate": 9.971043660510313e-06, "loss": 17.9465, "step": 3461 }, { "epoch": 0.06328257809786682, "grad_norm": 7.121677354722678, "learning_rate": 9.971011840545925e-06, "loss": 17.7377, "step": 3462 }, { "epoch": 0.06330085729431334, "grad_norm": 6.821687248846872, "learning_rate": 9.970980003158573e-06, "loss": 17.4453, "step": 3463 }, { "epoch": 0.06331913649075986, "grad_norm": 8.478505236474815, "learning_rate": 9.970948148348365e-06, "loss": 17.7499, "step": 3464 }, { "epoch": 0.0633374156872064, "grad_norm": 7.661870204257442, "learning_rate": 9.970916276115416e-06, "loss": 17.9667, "step": 3465 }, { "epoch": 0.06335569488365292, "grad_norm": 9.228132847856529, "learning_rate": 9.970884386459835e-06, "loss": 18.3447, "step": 3466 }, { "epoch": 0.06337397408009944, "grad_norm": 7.207955661898897, "learning_rate": 9.970852479381739e-06, "loss": 17.6804, "step": 3467 }, { "epoch": 0.06339225327654596, "grad_norm": 7.677179675585283, "learning_rate": 9.970820554881235e-06, "loss": 17.9184, "step": 3468 }, { "epoch": 0.06341053247299248, "grad_norm": 7.036440996411475, "learning_rate": 9.970788612958435e-06, "loss": 17.6884, "step": 3469 }, { "epoch": 0.063428811669439, "grad_norm": 6.1990578642071785, "learning_rate": 9.970756653613454e-06, "loss": 17.3292, "step": 3470 }, { "epoch": 0.06344709086588554, "grad_norm": 6.978394586225296, "learning_rate": 9.970724676846401e-06, "loss": 17.8265, "step": 3471 }, { "epoch": 0.06346537006233206, "grad_norm": 7.80215406458022, "learning_rate": 9.97069268265739e-06, "loss": 17.8772, "step": 3472 }, { "epoch": 0.06348364925877859, "grad_norm": 6.220208366230502, "learning_rate": 9.970660671046533e-06, "loss": 17.3685, "step": 3473 }, { "epoch": 0.0635019284552251, "grad_norm": 7.043690752149071, "learning_rate": 9.97062864201394e-06, "loss": 17.2635, "step": 3474 }, { "epoch": 0.06352020765167163, "grad_norm": 7.7375271177917435, "learning_rate": 9.970596595559727e-06, "loss": 17.8664, "step": 3475 }, { "epoch": 0.06353848684811815, "grad_norm": 6.803555219732546, "learning_rate": 9.970564531684005e-06, "loss": 17.731, "step": 3476 }, { "epoch": 0.06355676604456469, "grad_norm": 9.905881943875691, "learning_rate": 9.970532450386883e-06, "loss": 18.3341, "step": 3477 }, { "epoch": 0.06357504524101121, "grad_norm": 7.17266199032207, "learning_rate": 9.970500351668476e-06, "loss": 17.8205, "step": 3478 }, { "epoch": 0.06359332443745773, "grad_norm": 8.859449122632668, "learning_rate": 9.970468235528898e-06, "loss": 18.7001, "step": 3479 }, { "epoch": 0.06361160363390425, "grad_norm": 7.684843891034342, "learning_rate": 9.97043610196826e-06, "loss": 17.6728, "step": 3480 }, { "epoch": 0.06362988283035077, "grad_norm": 7.3613624030745965, "learning_rate": 9.970403950986675e-06, "loss": 17.8042, "step": 3481 }, { "epoch": 0.06364816202679731, "grad_norm": 7.933813741236401, "learning_rate": 9.970371782584254e-06, "loss": 17.8566, "step": 3482 }, { "epoch": 0.06366644122324383, "grad_norm": 7.20114050981594, "learning_rate": 9.970339596761113e-06, "loss": 17.7873, "step": 3483 }, { "epoch": 0.06368472041969035, "grad_norm": 6.539420780296843, "learning_rate": 9.970307393517363e-06, "loss": 17.6053, "step": 3484 }, { "epoch": 0.06370299961613687, "grad_norm": 7.198210037702214, "learning_rate": 9.970275172853116e-06, "loss": 17.8825, "step": 3485 }, { "epoch": 0.0637212788125834, "grad_norm": 6.423065344478874, "learning_rate": 9.970242934768486e-06, "loss": 17.1835, "step": 3486 }, { "epoch": 0.06373955800902992, "grad_norm": 9.592006472164796, "learning_rate": 9.970210679263585e-06, "loss": 18.8019, "step": 3487 }, { "epoch": 0.06375783720547645, "grad_norm": 8.381054912742002, "learning_rate": 9.970178406338528e-06, "loss": 17.8104, "step": 3488 }, { "epoch": 0.06377611640192297, "grad_norm": 6.2245779054760995, "learning_rate": 9.970146115993426e-06, "loss": 17.3555, "step": 3489 }, { "epoch": 0.0637943955983695, "grad_norm": 8.6187944119128, "learning_rate": 9.970113808228395e-06, "loss": 18.4059, "step": 3490 }, { "epoch": 0.06381267479481602, "grad_norm": 6.55115468548905, "learning_rate": 9.970081483043545e-06, "loss": 17.3778, "step": 3491 }, { "epoch": 0.06383095399126254, "grad_norm": 7.6721607603954, "learning_rate": 9.970049140438991e-06, "loss": 18.1595, "step": 3492 }, { "epoch": 0.06384923318770906, "grad_norm": 8.173171947955758, "learning_rate": 9.970016780414844e-06, "loss": 18.1178, "step": 3493 }, { "epoch": 0.0638675123841556, "grad_norm": 8.001892357122205, "learning_rate": 9.969984402971223e-06, "loss": 17.9552, "step": 3494 }, { "epoch": 0.06388579158060212, "grad_norm": 8.106211428069072, "learning_rate": 9.969952008108236e-06, "loss": 17.9411, "step": 3495 }, { "epoch": 0.06390407077704864, "grad_norm": 6.928555843489604, "learning_rate": 9.969919595825999e-06, "loss": 17.5931, "step": 3496 }, { "epoch": 0.06392234997349516, "grad_norm": 9.80360507054187, "learning_rate": 9.969887166124625e-06, "loss": 18.8677, "step": 3497 }, { "epoch": 0.06394062916994168, "grad_norm": 6.728350926935754, "learning_rate": 9.969854719004227e-06, "loss": 17.6105, "step": 3498 }, { "epoch": 0.06395890836638822, "grad_norm": 7.422988096770044, "learning_rate": 9.96982225446492e-06, "loss": 17.9216, "step": 3499 }, { "epoch": 0.06397718756283474, "grad_norm": 8.024234797780116, "learning_rate": 9.969789772506817e-06, "loss": 18.2083, "step": 3500 }, { "epoch": 0.06399546675928126, "grad_norm": 8.045249821909978, "learning_rate": 9.969757273130032e-06, "loss": 18.256, "step": 3501 }, { "epoch": 0.06401374595572779, "grad_norm": 7.986518270825566, "learning_rate": 9.96972475633468e-06, "loss": 18.283, "step": 3502 }, { "epoch": 0.06403202515217431, "grad_norm": 7.573442982924717, "learning_rate": 9.969692222120875e-06, "loss": 18.0723, "step": 3503 }, { "epoch": 0.06405030434862083, "grad_norm": 7.784550718432774, "learning_rate": 9.969659670488728e-06, "loss": 17.801, "step": 3504 }, { "epoch": 0.06406858354506736, "grad_norm": 8.831415996964335, "learning_rate": 9.969627101438356e-06, "loss": 17.9731, "step": 3505 }, { "epoch": 0.06408686274151389, "grad_norm": 7.652962288303948, "learning_rate": 9.969594514969871e-06, "loss": 17.7879, "step": 3506 }, { "epoch": 0.06410514193796041, "grad_norm": 7.566719122832205, "learning_rate": 9.96956191108339e-06, "loss": 17.9537, "step": 3507 }, { "epoch": 0.06412342113440693, "grad_norm": 6.850498608584895, "learning_rate": 9.969529289779024e-06, "loss": 17.5989, "step": 3508 }, { "epoch": 0.06414170033085345, "grad_norm": 7.642963557059988, "learning_rate": 9.96949665105689e-06, "loss": 18.1601, "step": 3509 }, { "epoch": 0.06415997952729997, "grad_norm": 6.92207305517525, "learning_rate": 9.9694639949171e-06, "loss": 17.8639, "step": 3510 }, { "epoch": 0.06417825872374651, "grad_norm": 6.9900197831105135, "learning_rate": 9.969431321359773e-06, "loss": 18.0146, "step": 3511 }, { "epoch": 0.06419653792019303, "grad_norm": 7.962592126136193, "learning_rate": 9.969398630385019e-06, "loss": 18.0781, "step": 3512 }, { "epoch": 0.06421481711663955, "grad_norm": 8.439722553655928, "learning_rate": 9.969365921992955e-06, "loss": 17.9793, "step": 3513 }, { "epoch": 0.06423309631308607, "grad_norm": 7.4717803840423835, "learning_rate": 9.969333196183693e-06, "loss": 17.9807, "step": 3514 }, { "epoch": 0.0642513755095326, "grad_norm": 7.821943658249452, "learning_rate": 9.96930045295735e-06, "loss": 18.2078, "step": 3515 }, { "epoch": 0.06426965470597913, "grad_norm": 7.15850318514372, "learning_rate": 9.969267692314039e-06, "loss": 17.624, "step": 3516 }, { "epoch": 0.06428793390242565, "grad_norm": 7.166171418224422, "learning_rate": 9.969234914253877e-06, "loss": 17.6763, "step": 3517 }, { "epoch": 0.06430621309887218, "grad_norm": 7.645142143798253, "learning_rate": 9.969202118776979e-06, "loss": 18.1204, "step": 3518 }, { "epoch": 0.0643244922953187, "grad_norm": 7.800160231317408, "learning_rate": 9.969169305883458e-06, "loss": 17.9998, "step": 3519 }, { "epoch": 0.06434277149176522, "grad_norm": 6.9924045145022955, "learning_rate": 9.969136475573429e-06, "loss": 17.7088, "step": 3520 }, { "epoch": 0.06436105068821174, "grad_norm": 7.876068104956508, "learning_rate": 9.969103627847008e-06, "loss": 17.8554, "step": 3521 }, { "epoch": 0.06437932988465828, "grad_norm": 7.203601413722107, "learning_rate": 9.969070762704311e-06, "loss": 17.8915, "step": 3522 }, { "epoch": 0.0643976090811048, "grad_norm": 7.71843516495048, "learning_rate": 9.96903788014545e-06, "loss": 18.3282, "step": 3523 }, { "epoch": 0.06441588827755132, "grad_norm": 7.691478339940075, "learning_rate": 9.969004980170546e-06, "loss": 18.2412, "step": 3524 }, { "epoch": 0.06443416747399784, "grad_norm": 6.741784960109834, "learning_rate": 9.968972062779708e-06, "loss": 17.612, "step": 3525 }, { "epoch": 0.06445244667044436, "grad_norm": 9.279574291796338, "learning_rate": 9.968939127973055e-06, "loss": 17.8691, "step": 3526 }, { "epoch": 0.06447072586689089, "grad_norm": 8.218887362358476, "learning_rate": 9.9689061757507e-06, "loss": 18.3941, "step": 3527 }, { "epoch": 0.06448900506333742, "grad_norm": 9.030415553386225, "learning_rate": 9.968873206112764e-06, "loss": 18.4235, "step": 3528 }, { "epoch": 0.06450728425978394, "grad_norm": 8.968290073544493, "learning_rate": 9.968840219059355e-06, "loss": 18.5045, "step": 3529 }, { "epoch": 0.06452556345623046, "grad_norm": 6.169962621475131, "learning_rate": 9.968807214590592e-06, "loss": 17.5488, "step": 3530 }, { "epoch": 0.06454384265267699, "grad_norm": 8.821595914720909, "learning_rate": 9.968774192706593e-06, "loss": 18.3084, "step": 3531 }, { "epoch": 0.06456212184912351, "grad_norm": 6.222977406102921, "learning_rate": 9.96874115340747e-06, "loss": 17.3397, "step": 3532 }, { "epoch": 0.06458040104557004, "grad_norm": 7.376944655592562, "learning_rate": 9.968708096693343e-06, "loss": 17.4941, "step": 3533 }, { "epoch": 0.06459868024201657, "grad_norm": 7.297119054188936, "learning_rate": 9.968675022564322e-06, "loss": 17.7489, "step": 3534 }, { "epoch": 0.06461695943846309, "grad_norm": 6.189290074236147, "learning_rate": 9.968641931020528e-06, "loss": 17.1373, "step": 3535 }, { "epoch": 0.06463523863490961, "grad_norm": 7.243176252691162, "learning_rate": 9.968608822062075e-06, "loss": 17.8784, "step": 3536 }, { "epoch": 0.06465351783135613, "grad_norm": 7.9600401586531495, "learning_rate": 9.968575695689078e-06, "loss": 18.1522, "step": 3537 }, { "epoch": 0.06467179702780265, "grad_norm": 8.094219192503523, "learning_rate": 9.968542551901657e-06, "loss": 18.0917, "step": 3538 }, { "epoch": 0.06469007622424919, "grad_norm": 6.900213326533915, "learning_rate": 9.968509390699923e-06, "loss": 17.6893, "step": 3539 }, { "epoch": 0.06470835542069571, "grad_norm": 7.771757318070501, "learning_rate": 9.968476212083994e-06, "loss": 17.6794, "step": 3540 }, { "epoch": 0.06472663461714223, "grad_norm": 7.796128646024294, "learning_rate": 9.96844301605399e-06, "loss": 18.0542, "step": 3541 }, { "epoch": 0.06474491381358875, "grad_norm": 6.333926310879374, "learning_rate": 9.968409802610024e-06, "loss": 17.492, "step": 3542 }, { "epoch": 0.06476319301003527, "grad_norm": 7.893493169383418, "learning_rate": 9.96837657175221e-06, "loss": 17.8904, "step": 3543 }, { "epoch": 0.0647814722064818, "grad_norm": 7.682323622852044, "learning_rate": 9.96834332348067e-06, "loss": 18.0747, "step": 3544 }, { "epoch": 0.06479975140292833, "grad_norm": 7.6343847131599825, "learning_rate": 9.968310057795516e-06, "loss": 17.9874, "step": 3545 }, { "epoch": 0.06481803059937485, "grad_norm": 7.70931668635223, "learning_rate": 9.968276774696867e-06, "loss": 18.1432, "step": 3546 }, { "epoch": 0.06483630979582138, "grad_norm": 7.042406319373183, "learning_rate": 9.96824347418484e-06, "loss": 17.7371, "step": 3547 }, { "epoch": 0.0648545889922679, "grad_norm": 7.032820555371767, "learning_rate": 9.96821015625955e-06, "loss": 17.6376, "step": 3548 }, { "epoch": 0.06487286818871442, "grad_norm": 8.465692286677994, "learning_rate": 9.968176820921113e-06, "loss": 18.5406, "step": 3549 }, { "epoch": 0.06489114738516095, "grad_norm": 8.325907663854666, "learning_rate": 9.968143468169651e-06, "loss": 17.8953, "step": 3550 }, { "epoch": 0.06490942658160748, "grad_norm": 6.391093030845671, "learning_rate": 9.968110098005274e-06, "loss": 17.4928, "step": 3551 }, { "epoch": 0.064927705778054, "grad_norm": 8.014652913343959, "learning_rate": 9.968076710428103e-06, "loss": 18.3309, "step": 3552 }, { "epoch": 0.06494598497450052, "grad_norm": 7.211976385444955, "learning_rate": 9.968043305438256e-06, "loss": 17.7902, "step": 3553 }, { "epoch": 0.06496426417094704, "grad_norm": 8.320694553004552, "learning_rate": 9.968009883035847e-06, "loss": 18.1843, "step": 3554 }, { "epoch": 0.06498254336739356, "grad_norm": 7.695379480450924, "learning_rate": 9.967976443220994e-06, "loss": 18.1156, "step": 3555 }, { "epoch": 0.0650008225638401, "grad_norm": 7.5360398867864555, "learning_rate": 9.967942985993815e-06, "loss": 17.8755, "step": 3556 }, { "epoch": 0.06501910176028662, "grad_norm": 7.394783515888027, "learning_rate": 9.967909511354427e-06, "loss": 17.8335, "step": 3557 }, { "epoch": 0.06503738095673314, "grad_norm": 6.878805886311911, "learning_rate": 9.967876019302947e-06, "loss": 17.758, "step": 3558 }, { "epoch": 0.06505566015317966, "grad_norm": 7.069124085978708, "learning_rate": 9.967842509839493e-06, "loss": 17.4327, "step": 3559 }, { "epoch": 0.06507393934962619, "grad_norm": 7.8435432108511085, "learning_rate": 9.967808982964183e-06, "loss": 17.4035, "step": 3560 }, { "epoch": 0.06509221854607271, "grad_norm": 8.897590660997398, "learning_rate": 9.967775438677131e-06, "loss": 18.4507, "step": 3561 }, { "epoch": 0.06511049774251924, "grad_norm": 7.665305953256472, "learning_rate": 9.967741876978459e-06, "loss": 17.8019, "step": 3562 }, { "epoch": 0.06512877693896577, "grad_norm": 7.7893858413132655, "learning_rate": 9.967708297868282e-06, "loss": 17.9123, "step": 3563 }, { "epoch": 0.06514705613541229, "grad_norm": 7.407263317066761, "learning_rate": 9.96767470134672e-06, "loss": 17.5609, "step": 3564 }, { "epoch": 0.06516533533185881, "grad_norm": 8.131104674091887, "learning_rate": 9.967641087413888e-06, "loss": 18.0436, "step": 3565 }, { "epoch": 0.06518361452830533, "grad_norm": 7.596401755151315, "learning_rate": 9.967607456069905e-06, "loss": 17.6687, "step": 3566 }, { "epoch": 0.06520189372475187, "grad_norm": 7.74646869203929, "learning_rate": 9.96757380731489e-06, "loss": 17.9164, "step": 3567 }, { "epoch": 0.06522017292119839, "grad_norm": 7.045659198787928, "learning_rate": 9.967540141148959e-06, "loss": 17.8673, "step": 3568 }, { "epoch": 0.06523845211764491, "grad_norm": 8.146337097205732, "learning_rate": 9.96750645757223e-06, "loss": 17.9246, "step": 3569 }, { "epoch": 0.06525673131409143, "grad_norm": 8.405984154371131, "learning_rate": 9.967472756584823e-06, "loss": 18.0451, "step": 3570 }, { "epoch": 0.06527501051053795, "grad_norm": 6.775109016848172, "learning_rate": 9.967439038186855e-06, "loss": 17.445, "step": 3571 }, { "epoch": 0.06529328970698448, "grad_norm": 7.400385393023171, "learning_rate": 9.967405302378444e-06, "loss": 17.738, "step": 3572 }, { "epoch": 0.06531156890343101, "grad_norm": 7.833313423241914, "learning_rate": 9.96737154915971e-06, "loss": 18.1323, "step": 3573 }, { "epoch": 0.06532984809987753, "grad_norm": 7.106764032621514, "learning_rate": 9.967337778530769e-06, "loss": 17.7625, "step": 3574 }, { "epoch": 0.06534812729632405, "grad_norm": 10.22666235137378, "learning_rate": 9.967303990491738e-06, "loss": 18.1572, "step": 3575 }, { "epoch": 0.06536640649277058, "grad_norm": 8.277992830655526, "learning_rate": 9.96727018504274e-06, "loss": 18.3004, "step": 3576 }, { "epoch": 0.0653846856892171, "grad_norm": 7.567160386383156, "learning_rate": 9.96723636218389e-06, "loss": 17.5543, "step": 3577 }, { "epoch": 0.06540296488566362, "grad_norm": 7.00449520450629, "learning_rate": 9.967202521915307e-06, "loss": 17.7054, "step": 3578 }, { "epoch": 0.06542124408211016, "grad_norm": 7.73123805219894, "learning_rate": 9.96716866423711e-06, "loss": 17.8919, "step": 3579 }, { "epoch": 0.06543952327855668, "grad_norm": 6.885900831776829, "learning_rate": 9.967134789149419e-06, "loss": 17.3299, "step": 3580 }, { "epoch": 0.0654578024750032, "grad_norm": 8.112229552955466, "learning_rate": 9.967100896652352e-06, "loss": 17.5914, "step": 3581 }, { "epoch": 0.06547608167144972, "grad_norm": 6.4199801399130765, "learning_rate": 9.967066986746026e-06, "loss": 17.2268, "step": 3582 }, { "epoch": 0.06549436086789624, "grad_norm": 7.675054421496131, "learning_rate": 9.967033059430562e-06, "loss": 17.9806, "step": 3583 }, { "epoch": 0.06551264006434278, "grad_norm": 7.478719143186732, "learning_rate": 9.96699911470608e-06, "loss": 17.7654, "step": 3584 }, { "epoch": 0.0655309192607893, "grad_norm": 8.405844893480488, "learning_rate": 9.966965152572694e-06, "loss": 18.1927, "step": 3585 }, { "epoch": 0.06554919845723582, "grad_norm": 7.736951161334362, "learning_rate": 9.966931173030528e-06, "loss": 18.1067, "step": 3586 }, { "epoch": 0.06556747765368234, "grad_norm": 7.903216192741018, "learning_rate": 9.9668971760797e-06, "loss": 17.7961, "step": 3587 }, { "epoch": 0.06558575685012887, "grad_norm": 8.324500951199788, "learning_rate": 9.966863161720326e-06, "loss": 18.0361, "step": 3588 }, { "epoch": 0.06560403604657539, "grad_norm": 6.013914722533633, "learning_rate": 9.966829129952528e-06, "loss": 17.18, "step": 3589 }, { "epoch": 0.06562231524302192, "grad_norm": 8.545617171751642, "learning_rate": 9.966795080776425e-06, "loss": 18.2204, "step": 3590 }, { "epoch": 0.06564059443946844, "grad_norm": 8.526968088011222, "learning_rate": 9.966761014192138e-06, "loss": 18.2618, "step": 3591 }, { "epoch": 0.06565887363591497, "grad_norm": 6.06931638243933, "learning_rate": 9.966726930199784e-06, "loss": 17.3711, "step": 3592 }, { "epoch": 0.06567715283236149, "grad_norm": 9.795067314208612, "learning_rate": 9.966692828799483e-06, "loss": 18.8647, "step": 3593 }, { "epoch": 0.06569543202880801, "grad_norm": 7.331816713163978, "learning_rate": 9.966658709991352e-06, "loss": 17.7088, "step": 3594 }, { "epoch": 0.06571371122525453, "grad_norm": 7.639954185195284, "learning_rate": 9.966624573775517e-06, "loss": 18.0959, "step": 3595 }, { "epoch": 0.06573199042170107, "grad_norm": 7.700168932923506, "learning_rate": 9.96659042015209e-06, "loss": 18.3082, "step": 3596 }, { "epoch": 0.06575026961814759, "grad_norm": 9.13690457500145, "learning_rate": 9.966556249121199e-06, "loss": 18.2918, "step": 3597 }, { "epoch": 0.06576854881459411, "grad_norm": 7.598414664686019, "learning_rate": 9.966522060682957e-06, "loss": 18.0308, "step": 3598 }, { "epoch": 0.06578682801104063, "grad_norm": 7.102651558654934, "learning_rate": 9.966487854837485e-06, "loss": 17.8665, "step": 3599 }, { "epoch": 0.06580510720748715, "grad_norm": 6.876939283695511, "learning_rate": 9.966453631584906e-06, "loss": 17.6633, "step": 3600 }, { "epoch": 0.06582338640393369, "grad_norm": 8.127901195429326, "learning_rate": 9.966419390925336e-06, "loss": 18.1879, "step": 3601 }, { "epoch": 0.06584166560038021, "grad_norm": 8.314206067549518, "learning_rate": 9.9663851328589e-06, "loss": 18.4616, "step": 3602 }, { "epoch": 0.06585994479682673, "grad_norm": 7.930742515215014, "learning_rate": 9.966350857385714e-06, "loss": 18.1745, "step": 3603 }, { "epoch": 0.06587822399327325, "grad_norm": 7.938715755344231, "learning_rate": 9.966316564505897e-06, "loss": 18.0051, "step": 3604 }, { "epoch": 0.06589650318971978, "grad_norm": 7.412150240046248, "learning_rate": 9.966282254219575e-06, "loss": 17.49, "step": 3605 }, { "epoch": 0.0659147823861663, "grad_norm": 8.901639329323402, "learning_rate": 9.966247926526862e-06, "loss": 18.5454, "step": 3606 }, { "epoch": 0.06593306158261283, "grad_norm": 7.632601527695011, "learning_rate": 9.96621358142788e-06, "loss": 18.0513, "step": 3607 }, { "epoch": 0.06595134077905936, "grad_norm": 7.813855987024707, "learning_rate": 9.966179218922754e-06, "loss": 17.5087, "step": 3608 }, { "epoch": 0.06596961997550588, "grad_norm": 7.983668250431304, "learning_rate": 9.966144839011597e-06, "loss": 18.0178, "step": 3609 }, { "epoch": 0.0659878991719524, "grad_norm": 7.489202970740367, "learning_rate": 9.966110441694536e-06, "loss": 17.7378, "step": 3610 }, { "epoch": 0.06600617836839892, "grad_norm": 6.415050654753606, "learning_rate": 9.966076026971688e-06, "loss": 17.3449, "step": 3611 }, { "epoch": 0.06602445756484544, "grad_norm": 7.864728428559402, "learning_rate": 9.966041594843175e-06, "loss": 18.1784, "step": 3612 }, { "epoch": 0.06604273676129198, "grad_norm": 7.576611071022064, "learning_rate": 9.966007145309115e-06, "loss": 17.6168, "step": 3613 }, { "epoch": 0.0660610159577385, "grad_norm": 7.670906182898218, "learning_rate": 9.965972678369633e-06, "loss": 18.0569, "step": 3614 }, { "epoch": 0.06607929515418502, "grad_norm": 7.881212668285415, "learning_rate": 9.965938194024846e-06, "loss": 18.0989, "step": 3615 }, { "epoch": 0.06609757435063154, "grad_norm": 7.617339270567452, "learning_rate": 9.965903692274878e-06, "loss": 18.0277, "step": 3616 }, { "epoch": 0.06611585354707807, "grad_norm": 7.220682379213172, "learning_rate": 9.965869173119849e-06, "loss": 17.8478, "step": 3617 }, { "epoch": 0.0661341327435246, "grad_norm": 7.9974297402963614, "learning_rate": 9.96583463655988e-06, "loss": 18.1157, "step": 3618 }, { "epoch": 0.06615241193997112, "grad_norm": 8.25383357327032, "learning_rate": 9.96580008259509e-06, "loss": 18.1823, "step": 3619 }, { "epoch": 0.06617069113641764, "grad_norm": 9.085583064693404, "learning_rate": 9.9657655112256e-06, "loss": 18.2514, "step": 3620 }, { "epoch": 0.06618897033286417, "grad_norm": 8.827278744364282, "learning_rate": 9.965730922451535e-06, "loss": 18.6595, "step": 3621 }, { "epoch": 0.06620724952931069, "grad_norm": 6.40800024285416, "learning_rate": 9.965696316273013e-06, "loss": 17.4722, "step": 3622 }, { "epoch": 0.06622552872575721, "grad_norm": 7.322330542321388, "learning_rate": 9.965661692690158e-06, "loss": 17.9229, "step": 3623 }, { "epoch": 0.06624380792220375, "grad_norm": 8.63134294616613, "learning_rate": 9.965627051703088e-06, "loss": 18.2308, "step": 3624 }, { "epoch": 0.06626208711865027, "grad_norm": 8.221315315450456, "learning_rate": 9.965592393311927e-06, "loss": 18.0214, "step": 3625 }, { "epoch": 0.06628036631509679, "grad_norm": 7.948911081925447, "learning_rate": 9.965557717516794e-06, "loss": 17.9538, "step": 3626 }, { "epoch": 0.06629864551154331, "grad_norm": 7.98964185910891, "learning_rate": 9.965523024317814e-06, "loss": 18.4565, "step": 3627 }, { "epoch": 0.06631692470798983, "grad_norm": 6.571445289451289, "learning_rate": 9.965488313715107e-06, "loss": 17.5406, "step": 3628 }, { "epoch": 0.06633520390443635, "grad_norm": 7.668087049288176, "learning_rate": 9.965453585708791e-06, "loss": 17.8423, "step": 3629 }, { "epoch": 0.06635348310088289, "grad_norm": 9.127646705308155, "learning_rate": 9.965418840298995e-06, "loss": 18.2477, "step": 3630 }, { "epoch": 0.06637176229732941, "grad_norm": 7.320556338144031, "learning_rate": 9.965384077485834e-06, "loss": 17.6702, "step": 3631 }, { "epoch": 0.06639004149377593, "grad_norm": 8.18728420801698, "learning_rate": 9.965349297269435e-06, "loss": 18.4822, "step": 3632 }, { "epoch": 0.06640832069022246, "grad_norm": 8.238194702977536, "learning_rate": 9.965314499649917e-06, "loss": 17.9435, "step": 3633 }, { "epoch": 0.06642659988666898, "grad_norm": 7.770663751371553, "learning_rate": 9.9652796846274e-06, "loss": 17.8512, "step": 3634 }, { "epoch": 0.06644487908311551, "grad_norm": 6.931472580092162, "learning_rate": 9.965244852202011e-06, "loss": 17.6323, "step": 3635 }, { "epoch": 0.06646315827956203, "grad_norm": 7.978645203749096, "learning_rate": 9.965210002373869e-06, "loss": 18.0182, "step": 3636 }, { "epoch": 0.06648143747600856, "grad_norm": 7.060176683857077, "learning_rate": 9.965175135143098e-06, "loss": 17.7228, "step": 3637 }, { "epoch": 0.06649971667245508, "grad_norm": 6.91300065249346, "learning_rate": 9.965140250509818e-06, "loss": 18.0372, "step": 3638 }, { "epoch": 0.0665179958689016, "grad_norm": 7.198185264977327, "learning_rate": 9.965105348474153e-06, "loss": 17.6783, "step": 3639 }, { "epoch": 0.06653627506534812, "grad_norm": 7.877918246822304, "learning_rate": 9.965070429036223e-06, "loss": 18.0394, "step": 3640 }, { "epoch": 0.06655455426179466, "grad_norm": 7.018533476143148, "learning_rate": 9.965035492196154e-06, "loss": 17.5641, "step": 3641 }, { "epoch": 0.06657283345824118, "grad_norm": 7.8852153569181525, "learning_rate": 9.965000537954064e-06, "loss": 17.7846, "step": 3642 }, { "epoch": 0.0665911126546877, "grad_norm": 7.746320258295745, "learning_rate": 9.96496556631008e-06, "loss": 17.9326, "step": 3643 }, { "epoch": 0.06660939185113422, "grad_norm": 7.22849019265795, "learning_rate": 9.964930577264323e-06, "loss": 17.7381, "step": 3644 }, { "epoch": 0.06662767104758074, "grad_norm": 7.310177395722805, "learning_rate": 9.964895570816912e-06, "loss": 17.9833, "step": 3645 }, { "epoch": 0.06664595024402727, "grad_norm": 6.936029700317219, "learning_rate": 9.964860546967976e-06, "loss": 17.7155, "step": 3646 }, { "epoch": 0.0666642294404738, "grad_norm": 7.711533693081667, "learning_rate": 9.964825505717633e-06, "loss": 18.1309, "step": 3647 }, { "epoch": 0.06668250863692032, "grad_norm": 7.779978389906678, "learning_rate": 9.964790447066008e-06, "loss": 17.9141, "step": 3648 }, { "epoch": 0.06670078783336685, "grad_norm": 7.150644276614255, "learning_rate": 9.964755371013225e-06, "loss": 17.8392, "step": 3649 }, { "epoch": 0.06671906702981337, "grad_norm": 8.12025679040642, "learning_rate": 9.964720277559401e-06, "loss": 18.2699, "step": 3650 }, { "epoch": 0.06673734622625989, "grad_norm": 7.177619422772814, "learning_rate": 9.964685166704666e-06, "loss": 17.6499, "step": 3651 }, { "epoch": 0.06675562542270642, "grad_norm": 6.934823447479643, "learning_rate": 9.964650038449139e-06, "loss": 17.798, "step": 3652 }, { "epoch": 0.06677390461915295, "grad_norm": 6.407166485372936, "learning_rate": 9.964614892792948e-06, "loss": 17.2369, "step": 3653 }, { "epoch": 0.06679218381559947, "grad_norm": 8.633064407304595, "learning_rate": 9.96457972973621e-06, "loss": 18.3923, "step": 3654 }, { "epoch": 0.06681046301204599, "grad_norm": 7.686353050424286, "learning_rate": 9.96454454927905e-06, "loss": 17.9912, "step": 3655 }, { "epoch": 0.06682874220849251, "grad_norm": 7.543979592879128, "learning_rate": 9.964509351421593e-06, "loss": 17.9041, "step": 3656 }, { "epoch": 0.06684702140493903, "grad_norm": 7.682844748231194, "learning_rate": 9.964474136163961e-06, "loss": 18.1909, "step": 3657 }, { "epoch": 0.06686530060138557, "grad_norm": 7.542047685677389, "learning_rate": 9.96443890350628e-06, "loss": 17.5756, "step": 3658 }, { "epoch": 0.06688357979783209, "grad_norm": 7.144669916419882, "learning_rate": 9.964403653448669e-06, "loss": 17.7512, "step": 3659 }, { "epoch": 0.06690185899427861, "grad_norm": 7.516784141610715, "learning_rate": 9.964368385991254e-06, "loss": 17.7277, "step": 3660 }, { "epoch": 0.06692013819072513, "grad_norm": 6.809284532807876, "learning_rate": 9.964333101134161e-06, "loss": 17.6542, "step": 3661 }, { "epoch": 0.06693841738717166, "grad_norm": 8.016323300616692, "learning_rate": 9.964297798877509e-06, "loss": 17.992, "step": 3662 }, { "epoch": 0.06695669658361818, "grad_norm": 7.236700239267418, "learning_rate": 9.964262479221424e-06, "loss": 17.6687, "step": 3663 }, { "epoch": 0.06697497578006471, "grad_norm": 8.445816640265996, "learning_rate": 9.964227142166033e-06, "loss": 18.187, "step": 3664 }, { "epoch": 0.06699325497651124, "grad_norm": 6.898403266475036, "learning_rate": 9.964191787711453e-06, "loss": 17.3689, "step": 3665 }, { "epoch": 0.06701153417295776, "grad_norm": 7.603489925944102, "learning_rate": 9.964156415857813e-06, "loss": 18.1952, "step": 3666 }, { "epoch": 0.06702981336940428, "grad_norm": 7.130434251094245, "learning_rate": 9.964121026605236e-06, "loss": 17.8387, "step": 3667 }, { "epoch": 0.0670480925658508, "grad_norm": 7.173812988187238, "learning_rate": 9.964085619953845e-06, "loss": 17.7426, "step": 3668 }, { "epoch": 0.06706637176229734, "grad_norm": 7.663182796601119, "learning_rate": 9.964050195903764e-06, "loss": 17.9353, "step": 3669 }, { "epoch": 0.06708465095874386, "grad_norm": 7.781673745246008, "learning_rate": 9.964014754455117e-06, "loss": 17.958, "step": 3670 }, { "epoch": 0.06710293015519038, "grad_norm": 6.557163244961542, "learning_rate": 9.963979295608031e-06, "loss": 17.3812, "step": 3671 }, { "epoch": 0.0671212093516369, "grad_norm": 8.194036113397624, "learning_rate": 9.963943819362628e-06, "loss": 18.3379, "step": 3672 }, { "epoch": 0.06713948854808342, "grad_norm": 7.0195021885865065, "learning_rate": 9.963908325719034e-06, "loss": 17.6027, "step": 3673 }, { "epoch": 0.06715776774452994, "grad_norm": 6.562676858630681, "learning_rate": 9.963872814677369e-06, "loss": 17.4976, "step": 3674 }, { "epoch": 0.06717604694097648, "grad_norm": 7.478989990416041, "learning_rate": 9.963837286237763e-06, "loss": 17.799, "step": 3675 }, { "epoch": 0.067194326137423, "grad_norm": 8.057254508520662, "learning_rate": 9.963801740400336e-06, "loss": 18.1576, "step": 3676 }, { "epoch": 0.06721260533386952, "grad_norm": 7.333838327509158, "learning_rate": 9.963766177165216e-06, "loss": 17.8813, "step": 3677 }, { "epoch": 0.06723088453031605, "grad_norm": 7.007514023860895, "learning_rate": 9.963730596532526e-06, "loss": 17.8473, "step": 3678 }, { "epoch": 0.06724916372676257, "grad_norm": 8.248034560183765, "learning_rate": 9.963694998502392e-06, "loss": 17.9892, "step": 3679 }, { "epoch": 0.06726744292320909, "grad_norm": 7.664262096770969, "learning_rate": 9.963659383074937e-06, "loss": 17.9312, "step": 3680 }, { "epoch": 0.06728572211965562, "grad_norm": 7.487524861207348, "learning_rate": 9.963623750250285e-06, "loss": 17.9139, "step": 3681 }, { "epoch": 0.06730400131610215, "grad_norm": 7.707866711653239, "learning_rate": 9.963588100028565e-06, "loss": 17.8018, "step": 3682 }, { "epoch": 0.06732228051254867, "grad_norm": 7.78447837652025, "learning_rate": 9.963552432409897e-06, "loss": 17.9661, "step": 3683 }, { "epoch": 0.06734055970899519, "grad_norm": 8.626494614214739, "learning_rate": 9.963516747394411e-06, "loss": 18.0817, "step": 3684 }, { "epoch": 0.06735883890544171, "grad_norm": 7.607945441358762, "learning_rate": 9.963481044982227e-06, "loss": 18.0393, "step": 3685 }, { "epoch": 0.06737711810188825, "grad_norm": 6.902426308636471, "learning_rate": 9.963445325173473e-06, "loss": 17.4743, "step": 3686 }, { "epoch": 0.06739539729833477, "grad_norm": 8.311685125637775, "learning_rate": 9.963409587968276e-06, "loss": 17.8539, "step": 3687 }, { "epoch": 0.06741367649478129, "grad_norm": 7.048286794631574, "learning_rate": 9.963373833366758e-06, "loss": 18.0255, "step": 3688 }, { "epoch": 0.06743195569122781, "grad_norm": 7.1412325068410105, "learning_rate": 9.963338061369045e-06, "loss": 18.0242, "step": 3689 }, { "epoch": 0.06745023488767433, "grad_norm": 7.348625419574826, "learning_rate": 9.963302271975263e-06, "loss": 17.469, "step": 3690 }, { "epoch": 0.06746851408412086, "grad_norm": 16.995275294386243, "learning_rate": 9.963266465185536e-06, "loss": 18.8894, "step": 3691 }, { "epoch": 0.06748679328056739, "grad_norm": 7.533170174551889, "learning_rate": 9.963230640999993e-06, "loss": 17.7024, "step": 3692 }, { "epoch": 0.06750507247701391, "grad_norm": 7.248560779489402, "learning_rate": 9.963194799418755e-06, "loss": 17.7748, "step": 3693 }, { "epoch": 0.06752335167346044, "grad_norm": 8.587210185826397, "learning_rate": 9.963158940441953e-06, "loss": 18.3314, "step": 3694 }, { "epoch": 0.06754163086990696, "grad_norm": 8.66856194071746, "learning_rate": 9.963123064069707e-06, "loss": 18.6316, "step": 3695 }, { "epoch": 0.06755991006635348, "grad_norm": 6.8005485687612275, "learning_rate": 9.963087170302146e-06, "loss": 17.366, "step": 3696 }, { "epoch": 0.0675781892628, "grad_norm": 7.590585740008545, "learning_rate": 9.963051259139395e-06, "loss": 17.9532, "step": 3697 }, { "epoch": 0.06759646845924654, "grad_norm": 7.415068222981645, "learning_rate": 9.963015330581581e-06, "loss": 17.8159, "step": 3698 }, { "epoch": 0.06761474765569306, "grad_norm": 7.445670101225243, "learning_rate": 9.962979384628829e-06, "loss": 17.7052, "step": 3699 }, { "epoch": 0.06763302685213958, "grad_norm": 6.826785358678163, "learning_rate": 9.962943421281264e-06, "loss": 17.737, "step": 3700 }, { "epoch": 0.0676513060485861, "grad_norm": 7.432857338679918, "learning_rate": 9.962907440539012e-06, "loss": 18.2263, "step": 3701 }, { "epoch": 0.06766958524503262, "grad_norm": 6.261111150705873, "learning_rate": 9.962871442402203e-06, "loss": 17.2048, "step": 3702 }, { "epoch": 0.06768786444147916, "grad_norm": 7.454775341619104, "learning_rate": 9.962835426870958e-06, "loss": 17.999, "step": 3703 }, { "epoch": 0.06770614363792568, "grad_norm": 7.56542599926941, "learning_rate": 9.962799393945406e-06, "loss": 18.0887, "step": 3704 }, { "epoch": 0.0677244228343722, "grad_norm": 7.967020769632536, "learning_rate": 9.962763343625672e-06, "loss": 17.9031, "step": 3705 }, { "epoch": 0.06774270203081872, "grad_norm": 7.667758830754452, "learning_rate": 9.962727275911883e-06, "loss": 17.941, "step": 3706 }, { "epoch": 0.06776098122726525, "grad_norm": 7.1873448141762335, "learning_rate": 9.962691190804167e-06, "loss": 17.7057, "step": 3707 }, { "epoch": 0.06777926042371177, "grad_norm": 7.984551617788175, "learning_rate": 9.962655088302648e-06, "loss": 18.1008, "step": 3708 }, { "epoch": 0.0677975396201583, "grad_norm": 7.350720256152015, "learning_rate": 9.962618968407455e-06, "loss": 17.7906, "step": 3709 }, { "epoch": 0.06781581881660483, "grad_norm": 8.235914722238629, "learning_rate": 9.962582831118712e-06, "loss": 18.0387, "step": 3710 }, { "epoch": 0.06783409801305135, "grad_norm": 7.3465049296915215, "learning_rate": 9.962546676436547e-06, "loss": 18.0271, "step": 3711 }, { "epoch": 0.06785237720949787, "grad_norm": 9.091010809250884, "learning_rate": 9.962510504361087e-06, "loss": 18.4861, "step": 3712 }, { "epoch": 0.06787065640594439, "grad_norm": 6.606151229758446, "learning_rate": 9.962474314892456e-06, "loss": 17.4758, "step": 3713 }, { "epoch": 0.06788893560239091, "grad_norm": 8.364753569679415, "learning_rate": 9.962438108030786e-06, "loss": 18.0549, "step": 3714 }, { "epoch": 0.06790721479883745, "grad_norm": 5.8204707977722965, "learning_rate": 9.9624018837762e-06, "loss": 17.1909, "step": 3715 }, { "epoch": 0.06792549399528397, "grad_norm": 8.532616712133498, "learning_rate": 9.962365642128825e-06, "loss": 18.3254, "step": 3716 }, { "epoch": 0.06794377319173049, "grad_norm": 7.6865609996747155, "learning_rate": 9.962329383088791e-06, "loss": 18.0385, "step": 3717 }, { "epoch": 0.06796205238817701, "grad_norm": 8.483761109157722, "learning_rate": 9.962293106656222e-06, "loss": 18.2805, "step": 3718 }, { "epoch": 0.06798033158462354, "grad_norm": 7.960116500492674, "learning_rate": 9.962256812831245e-06, "loss": 18.2548, "step": 3719 }, { "epoch": 0.06799861078107007, "grad_norm": 7.351339077773364, "learning_rate": 9.962220501613991e-06, "loss": 17.7659, "step": 3720 }, { "epoch": 0.06801688997751659, "grad_norm": 7.306086424667269, "learning_rate": 9.962184173004583e-06, "loss": 17.9074, "step": 3721 }, { "epoch": 0.06803516917396311, "grad_norm": 7.366303798764439, "learning_rate": 9.962147827003152e-06, "loss": 17.5614, "step": 3722 }, { "epoch": 0.06805344837040964, "grad_norm": 7.477827781803198, "learning_rate": 9.962111463609822e-06, "loss": 17.7797, "step": 3723 }, { "epoch": 0.06807172756685616, "grad_norm": 8.984259687353106, "learning_rate": 9.962075082824722e-06, "loss": 18.0538, "step": 3724 }, { "epoch": 0.06809000676330268, "grad_norm": 6.958962392731023, "learning_rate": 9.96203868464798e-06, "loss": 17.5696, "step": 3725 }, { "epoch": 0.06810828595974922, "grad_norm": 6.8283761609229074, "learning_rate": 9.962002269079722e-06, "loss": 17.5826, "step": 3726 }, { "epoch": 0.06812656515619574, "grad_norm": 7.9269979599067355, "learning_rate": 9.961965836120076e-06, "loss": 17.9868, "step": 3727 }, { "epoch": 0.06814484435264226, "grad_norm": 7.339234417282792, "learning_rate": 9.961929385769172e-06, "loss": 17.7234, "step": 3728 }, { "epoch": 0.06816312354908878, "grad_norm": 7.82796076854723, "learning_rate": 9.961892918027136e-06, "loss": 18.6493, "step": 3729 }, { "epoch": 0.0681814027455353, "grad_norm": 7.671431642947557, "learning_rate": 9.961856432894096e-06, "loss": 17.8904, "step": 3730 }, { "epoch": 0.06819968194198182, "grad_norm": 8.118278044873199, "learning_rate": 9.961819930370178e-06, "loss": 18.332, "step": 3731 }, { "epoch": 0.06821796113842836, "grad_norm": 7.163604470953865, "learning_rate": 9.961783410455514e-06, "loss": 17.7846, "step": 3732 }, { "epoch": 0.06823624033487488, "grad_norm": 7.088079444102592, "learning_rate": 9.961746873150228e-06, "loss": 17.6912, "step": 3733 }, { "epoch": 0.0682545195313214, "grad_norm": 8.259109711126358, "learning_rate": 9.96171031845445e-06, "loss": 18.3023, "step": 3734 }, { "epoch": 0.06827279872776792, "grad_norm": 6.390717546983588, "learning_rate": 9.961673746368307e-06, "loss": 17.4842, "step": 3735 }, { "epoch": 0.06829107792421445, "grad_norm": 8.54189722197786, "learning_rate": 9.961637156891931e-06, "loss": 18.6318, "step": 3736 }, { "epoch": 0.06830935712066098, "grad_norm": 8.003033235527171, "learning_rate": 9.961600550025445e-06, "loss": 18.1477, "step": 3737 }, { "epoch": 0.0683276363171075, "grad_norm": 6.786485587457048, "learning_rate": 9.961563925768981e-06, "loss": 17.5007, "step": 3738 }, { "epoch": 0.06834591551355403, "grad_norm": 9.1781443473539, "learning_rate": 9.961527284122665e-06, "loss": 18.4901, "step": 3739 }, { "epoch": 0.06836419471000055, "grad_norm": 6.747170850842066, "learning_rate": 9.961490625086627e-06, "loss": 17.6464, "step": 3740 }, { "epoch": 0.06838247390644707, "grad_norm": 9.24932579666635, "learning_rate": 9.961453948660995e-06, "loss": 18.4559, "step": 3741 }, { "epoch": 0.06840075310289359, "grad_norm": 8.075774718250974, "learning_rate": 9.961417254845897e-06, "loss": 18.0175, "step": 3742 }, { "epoch": 0.06841903229934013, "grad_norm": 7.389772889445342, "learning_rate": 9.961380543641461e-06, "loss": 17.7848, "step": 3743 }, { "epoch": 0.06843731149578665, "grad_norm": 7.2004802013132885, "learning_rate": 9.961343815047818e-06, "loss": 17.598, "step": 3744 }, { "epoch": 0.06845559069223317, "grad_norm": 8.669813104855471, "learning_rate": 9.961307069065096e-06, "loss": 18.007, "step": 3745 }, { "epoch": 0.06847386988867969, "grad_norm": 7.563564649745429, "learning_rate": 9.961270305693422e-06, "loss": 17.961, "step": 3746 }, { "epoch": 0.06849214908512621, "grad_norm": 7.855342442049126, "learning_rate": 9.961233524932927e-06, "loss": 18.0859, "step": 3747 }, { "epoch": 0.06851042828157274, "grad_norm": 6.835256251563989, "learning_rate": 9.961196726783738e-06, "loss": 17.5802, "step": 3748 }, { "epoch": 0.06852870747801927, "grad_norm": 9.09076091118749, "learning_rate": 9.961159911245986e-06, "loss": 18.1888, "step": 3749 }, { "epoch": 0.06854698667446579, "grad_norm": 8.082008770565626, "learning_rate": 9.961123078319798e-06, "loss": 18.0489, "step": 3750 }, { "epoch": 0.06856526587091231, "grad_norm": 7.637977939444455, "learning_rate": 9.961086228005305e-06, "loss": 17.9384, "step": 3751 }, { "epoch": 0.06858354506735884, "grad_norm": 8.208068112705568, "learning_rate": 9.961049360302634e-06, "loss": 18.1634, "step": 3752 }, { "epoch": 0.06860182426380536, "grad_norm": 7.719268232487972, "learning_rate": 9.961012475211917e-06, "loss": 18.1333, "step": 3753 }, { "epoch": 0.0686201034602519, "grad_norm": 7.367828581028043, "learning_rate": 9.960975572733282e-06, "loss": 17.7244, "step": 3754 }, { "epoch": 0.06863838265669842, "grad_norm": 7.456793050592895, "learning_rate": 9.960938652866857e-06, "loss": 17.7094, "step": 3755 }, { "epoch": 0.06865666185314494, "grad_norm": 7.800264409200894, "learning_rate": 9.960901715612774e-06, "loss": 18.1531, "step": 3756 }, { "epoch": 0.06867494104959146, "grad_norm": 6.7314809857282665, "learning_rate": 9.960864760971159e-06, "loss": 17.6805, "step": 3757 }, { "epoch": 0.06869322024603798, "grad_norm": 7.6788020934336405, "learning_rate": 9.960827788942144e-06, "loss": 17.6742, "step": 3758 }, { "epoch": 0.0687114994424845, "grad_norm": 8.146205801876379, "learning_rate": 9.96079079952586e-06, "loss": 18.6369, "step": 3759 }, { "epoch": 0.06872977863893104, "grad_norm": 8.26115784078957, "learning_rate": 9.960753792722432e-06, "loss": 18.5603, "step": 3760 }, { "epoch": 0.06874805783537756, "grad_norm": 9.019812898584123, "learning_rate": 9.960716768531993e-06, "loss": 18.0269, "step": 3761 }, { "epoch": 0.06876633703182408, "grad_norm": 6.465686684918821, "learning_rate": 9.960679726954674e-06, "loss": 17.5717, "step": 3762 }, { "epoch": 0.0687846162282706, "grad_norm": 8.125270644039192, "learning_rate": 9.960642667990601e-06, "loss": 18.1933, "step": 3763 }, { "epoch": 0.06880289542471713, "grad_norm": 8.771975936087479, "learning_rate": 9.960605591639908e-06, "loss": 18.4901, "step": 3764 }, { "epoch": 0.06882117462116365, "grad_norm": 6.339127645571222, "learning_rate": 9.96056849790272e-06, "loss": 17.5171, "step": 3765 }, { "epoch": 0.06883945381761018, "grad_norm": 6.245842990050287, "learning_rate": 9.960531386779173e-06, "loss": 17.5162, "step": 3766 }, { "epoch": 0.0688577330140567, "grad_norm": 7.009487915994345, "learning_rate": 9.960494258269391e-06, "loss": 17.6565, "step": 3767 }, { "epoch": 0.06887601221050323, "grad_norm": 8.15864628189637, "learning_rate": 9.96045711237351e-06, "loss": 17.8422, "step": 3768 }, { "epoch": 0.06889429140694975, "grad_norm": 8.073448513013414, "learning_rate": 9.960419949091655e-06, "loss": 18.0036, "step": 3769 }, { "epoch": 0.06891257060339627, "grad_norm": 7.674304767211397, "learning_rate": 9.96038276842396e-06, "loss": 18.26, "step": 3770 }, { "epoch": 0.0689308497998428, "grad_norm": 7.740456753546985, "learning_rate": 9.960345570370552e-06, "loss": 18.2854, "step": 3771 }, { "epoch": 0.06894912899628933, "grad_norm": 7.139536987911921, "learning_rate": 9.960308354931564e-06, "loss": 17.8847, "step": 3772 }, { "epoch": 0.06896740819273585, "grad_norm": 6.721470384251413, "learning_rate": 9.960271122107127e-06, "loss": 17.5681, "step": 3773 }, { "epoch": 0.06898568738918237, "grad_norm": 7.326324475271985, "learning_rate": 9.96023387189737e-06, "loss": 17.8968, "step": 3774 }, { "epoch": 0.06900396658562889, "grad_norm": 7.8901041168503045, "learning_rate": 9.960196604302421e-06, "loss": 17.9237, "step": 3775 }, { "epoch": 0.06902224578207541, "grad_norm": 9.181082980181333, "learning_rate": 9.960159319322415e-06, "loss": 18.29, "step": 3776 }, { "epoch": 0.06904052497852195, "grad_norm": 7.311520840686915, "learning_rate": 9.960122016957483e-06, "loss": 17.7929, "step": 3777 }, { "epoch": 0.06905880417496847, "grad_norm": 6.928751391376276, "learning_rate": 9.96008469720775e-06, "loss": 17.477, "step": 3778 }, { "epoch": 0.069077083371415, "grad_norm": 8.172636126545248, "learning_rate": 9.960047360073353e-06, "loss": 18.1091, "step": 3779 }, { "epoch": 0.06909536256786152, "grad_norm": 8.057727424639237, "learning_rate": 9.96001000555442e-06, "loss": 17.8632, "step": 3780 }, { "epoch": 0.06911364176430804, "grad_norm": 7.684672935232596, "learning_rate": 9.959972633651083e-06, "loss": 18.0447, "step": 3781 }, { "epoch": 0.06913192096075456, "grad_norm": 7.216855034027728, "learning_rate": 9.95993524436347e-06, "loss": 17.7419, "step": 3782 }, { "epoch": 0.0691502001572011, "grad_norm": 7.853531177231895, "learning_rate": 9.959897837691717e-06, "loss": 18.1424, "step": 3783 }, { "epoch": 0.06916847935364762, "grad_norm": 7.374683926499741, "learning_rate": 9.959860413635951e-06, "loss": 17.782, "step": 3784 }, { "epoch": 0.06918675855009414, "grad_norm": 6.963507141610772, "learning_rate": 9.959822972196306e-06, "loss": 17.3137, "step": 3785 }, { "epoch": 0.06920503774654066, "grad_norm": 7.795562780346318, "learning_rate": 9.95978551337291e-06, "loss": 17.886, "step": 3786 }, { "epoch": 0.06922331694298718, "grad_norm": 8.110133946860046, "learning_rate": 9.959748037165897e-06, "loss": 18.2596, "step": 3787 }, { "epoch": 0.06924159613943372, "grad_norm": 7.645011338232504, "learning_rate": 9.959710543575397e-06, "loss": 17.8402, "step": 3788 }, { "epoch": 0.06925987533588024, "grad_norm": 6.859118651318195, "learning_rate": 9.959673032601544e-06, "loss": 17.7896, "step": 3789 }, { "epoch": 0.06927815453232676, "grad_norm": 7.007663177435595, "learning_rate": 9.959635504244466e-06, "loss": 17.732, "step": 3790 }, { "epoch": 0.06929643372877328, "grad_norm": 7.4868070815628815, "learning_rate": 9.959597958504296e-06, "loss": 17.9148, "step": 3791 }, { "epoch": 0.0693147129252198, "grad_norm": 6.576444156646345, "learning_rate": 9.959560395381166e-06, "loss": 17.5448, "step": 3792 }, { "epoch": 0.06933299212166633, "grad_norm": 6.707517850683992, "learning_rate": 9.959522814875206e-06, "loss": 17.8627, "step": 3793 }, { "epoch": 0.06935127131811286, "grad_norm": 6.707322458438496, "learning_rate": 9.959485216986548e-06, "loss": 17.4698, "step": 3794 }, { "epoch": 0.06936955051455938, "grad_norm": 7.485522659872726, "learning_rate": 9.959447601715328e-06, "loss": 17.8513, "step": 3795 }, { "epoch": 0.0693878297110059, "grad_norm": 7.275228060747942, "learning_rate": 9.959409969061673e-06, "loss": 17.8599, "step": 3796 }, { "epoch": 0.06940610890745243, "grad_norm": 7.809365144659854, "learning_rate": 9.959372319025715e-06, "loss": 17.8961, "step": 3797 }, { "epoch": 0.06942438810389895, "grad_norm": 6.528049873087958, "learning_rate": 9.959334651607589e-06, "loss": 17.2796, "step": 3798 }, { "epoch": 0.06944266730034547, "grad_norm": 6.793507717491632, "learning_rate": 9.959296966807425e-06, "loss": 17.607, "step": 3799 }, { "epoch": 0.069460946496792, "grad_norm": 5.979708116887712, "learning_rate": 9.959259264625355e-06, "loss": 17.1363, "step": 3800 }, { "epoch": 0.06947922569323853, "grad_norm": 7.522838490297659, "learning_rate": 9.959221545061512e-06, "loss": 17.9469, "step": 3801 }, { "epoch": 0.06949750488968505, "grad_norm": 7.249327032656113, "learning_rate": 9.959183808116028e-06, "loss": 17.6304, "step": 3802 }, { "epoch": 0.06951578408613157, "grad_norm": 8.86934517787543, "learning_rate": 9.959146053789035e-06, "loss": 18.454, "step": 3803 }, { "epoch": 0.06953406328257809, "grad_norm": 8.022226081210857, "learning_rate": 9.959108282080665e-06, "loss": 18.1153, "step": 3804 }, { "epoch": 0.06955234247902463, "grad_norm": 6.8823684446573665, "learning_rate": 9.959070492991052e-06, "loss": 17.4093, "step": 3805 }, { "epoch": 0.06957062167547115, "grad_norm": 7.2076966204602, "learning_rate": 9.959032686520326e-06, "loss": 17.7794, "step": 3806 }, { "epoch": 0.06958890087191767, "grad_norm": 7.8839595532464415, "learning_rate": 9.95899486266862e-06, "loss": 18.0942, "step": 3807 }, { "epoch": 0.0696071800683642, "grad_norm": 8.653682956089938, "learning_rate": 9.95895702143607e-06, "loss": 18.2604, "step": 3808 }, { "epoch": 0.06962545926481072, "grad_norm": 9.14225694344878, "learning_rate": 9.958919162822805e-06, "loss": 18.6332, "step": 3809 }, { "epoch": 0.06964373846125724, "grad_norm": 7.246660931457984, "learning_rate": 9.958881286828956e-06, "loss": 17.7692, "step": 3810 }, { "epoch": 0.06966201765770377, "grad_norm": 7.45850903341891, "learning_rate": 9.95884339345466e-06, "loss": 18.0857, "step": 3811 }, { "epoch": 0.0696802968541503, "grad_norm": 8.270664122826734, "learning_rate": 9.95880548270005e-06, "loss": 18.2097, "step": 3812 }, { "epoch": 0.06969857605059682, "grad_norm": 7.304124010185371, "learning_rate": 9.958767554565256e-06, "loss": 17.8819, "step": 3813 }, { "epoch": 0.06971685524704334, "grad_norm": 6.074939583078227, "learning_rate": 9.958729609050412e-06, "loss": 17.3484, "step": 3814 }, { "epoch": 0.06973513444348986, "grad_norm": 7.497330253821664, "learning_rate": 9.95869164615565e-06, "loss": 18.0494, "step": 3815 }, { "epoch": 0.06975341363993638, "grad_norm": 7.511123475752274, "learning_rate": 9.958653665881106e-06, "loss": 17.6961, "step": 3816 }, { "epoch": 0.06977169283638292, "grad_norm": 7.020476709462503, "learning_rate": 9.95861566822691e-06, "loss": 17.5925, "step": 3817 }, { "epoch": 0.06978997203282944, "grad_norm": 7.613458171651699, "learning_rate": 9.958577653193195e-06, "loss": 18.1222, "step": 3818 }, { "epoch": 0.06980825122927596, "grad_norm": 6.7776175462570425, "learning_rate": 9.9585396207801e-06, "loss": 17.5001, "step": 3819 }, { "epoch": 0.06982653042572248, "grad_norm": 7.048944765465509, "learning_rate": 9.958501570987748e-06, "loss": 17.6315, "step": 3820 }, { "epoch": 0.069844809622169, "grad_norm": 6.996495382862664, "learning_rate": 9.958463503816283e-06, "loss": 17.7513, "step": 3821 }, { "epoch": 0.06986308881861554, "grad_norm": 6.207166277887507, "learning_rate": 9.958425419265831e-06, "loss": 17.2869, "step": 3822 }, { "epoch": 0.06988136801506206, "grad_norm": 7.289063728499782, "learning_rate": 9.958387317336529e-06, "loss": 18.0618, "step": 3823 }, { "epoch": 0.06989964721150858, "grad_norm": 6.313155286089765, "learning_rate": 9.95834919802851e-06, "loss": 17.2904, "step": 3824 }, { "epoch": 0.0699179264079551, "grad_norm": 7.962502631421321, "learning_rate": 9.958311061341908e-06, "loss": 17.9601, "step": 3825 }, { "epoch": 0.06993620560440163, "grad_norm": 8.332043311595505, "learning_rate": 9.958272907276855e-06, "loss": 18.1206, "step": 3826 }, { "epoch": 0.06995448480084815, "grad_norm": 7.400477058760804, "learning_rate": 9.958234735833485e-06, "loss": 17.89, "step": 3827 }, { "epoch": 0.06997276399729468, "grad_norm": 8.613183284713479, "learning_rate": 9.958196547011934e-06, "loss": 18.0946, "step": 3828 }, { "epoch": 0.0699910431937412, "grad_norm": 7.362058331686377, "learning_rate": 9.958158340812333e-06, "loss": 17.6704, "step": 3829 }, { "epoch": 0.07000932239018773, "grad_norm": 8.011238170474346, "learning_rate": 9.958120117234818e-06, "loss": 18.2923, "step": 3830 }, { "epoch": 0.07002760158663425, "grad_norm": 7.26819504568335, "learning_rate": 9.958081876279523e-06, "loss": 18.0602, "step": 3831 }, { "epoch": 0.07004588078308077, "grad_norm": 6.505793670949363, "learning_rate": 9.95804361794658e-06, "loss": 17.2733, "step": 3832 }, { "epoch": 0.0700641599795273, "grad_norm": 8.085243306574576, "learning_rate": 9.958005342236126e-06, "loss": 18.3954, "step": 3833 }, { "epoch": 0.07008243917597383, "grad_norm": 8.094394056718064, "learning_rate": 9.957967049148292e-06, "loss": 18.2679, "step": 3834 }, { "epoch": 0.07010071837242035, "grad_norm": 8.984286766048601, "learning_rate": 9.957928738683213e-06, "loss": 18.5315, "step": 3835 }, { "epoch": 0.07011899756886687, "grad_norm": 7.361178826776351, "learning_rate": 9.957890410841026e-06, "loss": 17.8741, "step": 3836 }, { "epoch": 0.0701372767653134, "grad_norm": 8.090209621958795, "learning_rate": 9.957852065621862e-06, "loss": 18.0183, "step": 3837 }, { "epoch": 0.07015555596175992, "grad_norm": 7.679469249415488, "learning_rate": 9.957813703025857e-06, "loss": 17.987, "step": 3838 }, { "epoch": 0.07017383515820645, "grad_norm": 7.349116689247304, "learning_rate": 9.957775323053146e-06, "loss": 17.8111, "step": 3839 }, { "epoch": 0.07019211435465297, "grad_norm": 9.154605941566015, "learning_rate": 9.957736925703863e-06, "loss": 18.5949, "step": 3840 }, { "epoch": 0.0702103935510995, "grad_norm": 8.187506651271368, "learning_rate": 9.95769851097814e-06, "loss": 18.2681, "step": 3841 }, { "epoch": 0.07022867274754602, "grad_norm": 5.924782325222919, "learning_rate": 9.957660078876116e-06, "loss": 17.3512, "step": 3842 }, { "epoch": 0.07024695194399254, "grad_norm": 6.55882708458667, "learning_rate": 9.957621629397921e-06, "loss": 17.4009, "step": 3843 }, { "epoch": 0.07026523114043906, "grad_norm": 7.487796551520118, "learning_rate": 9.957583162543696e-06, "loss": 17.9639, "step": 3844 }, { "epoch": 0.0702835103368856, "grad_norm": 7.275007051117932, "learning_rate": 9.95754467831357e-06, "loss": 17.5755, "step": 3845 }, { "epoch": 0.07030178953333212, "grad_norm": 12.569893684604146, "learning_rate": 9.95750617670768e-06, "loss": 17.2282, "step": 3846 }, { "epoch": 0.07032006872977864, "grad_norm": 8.240543272249862, "learning_rate": 9.957467657726163e-06, "loss": 17.7524, "step": 3847 }, { "epoch": 0.07033834792622516, "grad_norm": 7.302420800858163, "learning_rate": 9.95742912136915e-06, "loss": 17.7059, "step": 3848 }, { "epoch": 0.07035662712267168, "grad_norm": 7.840809494218581, "learning_rate": 9.95739056763678e-06, "loss": 17.8699, "step": 3849 }, { "epoch": 0.0703749063191182, "grad_norm": 7.174100674831677, "learning_rate": 9.957351996529185e-06, "loss": 17.8066, "step": 3850 }, { "epoch": 0.07039318551556474, "grad_norm": 8.467747837492595, "learning_rate": 9.9573134080465e-06, "loss": 18.4042, "step": 3851 }, { "epoch": 0.07041146471201126, "grad_norm": 5.958143703909017, "learning_rate": 9.957274802188864e-06, "loss": 17.1516, "step": 3852 }, { "epoch": 0.07042974390845778, "grad_norm": 6.857922505161306, "learning_rate": 9.95723617895641e-06, "loss": 17.5791, "step": 3853 }, { "epoch": 0.0704480231049043, "grad_norm": 8.567716149969353, "learning_rate": 9.957197538349273e-06, "loss": 18.0442, "step": 3854 }, { "epoch": 0.07046630230135083, "grad_norm": 8.980549231024167, "learning_rate": 9.957158880367589e-06, "loss": 17.886, "step": 3855 }, { "epoch": 0.07048458149779736, "grad_norm": 6.891651257083474, "learning_rate": 9.957120205011492e-06, "loss": 17.8852, "step": 3856 }, { "epoch": 0.07050286069424389, "grad_norm": 8.484493154188359, "learning_rate": 9.95708151228112e-06, "loss": 18.456, "step": 3857 }, { "epoch": 0.0705211398906904, "grad_norm": 6.898141184519592, "learning_rate": 9.957042802176606e-06, "loss": 17.7343, "step": 3858 }, { "epoch": 0.07053941908713693, "grad_norm": 6.88328401354471, "learning_rate": 9.957004074698088e-06, "loss": 17.7004, "step": 3859 }, { "epoch": 0.07055769828358345, "grad_norm": 6.211656798973414, "learning_rate": 9.956965329845701e-06, "loss": 17.2023, "step": 3860 }, { "epoch": 0.07057597748002997, "grad_norm": 8.845461155674911, "learning_rate": 9.95692656761958e-06, "loss": 18.26, "step": 3861 }, { "epoch": 0.07059425667647651, "grad_norm": 7.889961654619663, "learning_rate": 9.956887788019863e-06, "loss": 17.9673, "step": 3862 }, { "epoch": 0.07061253587292303, "grad_norm": 7.44714448615704, "learning_rate": 9.956848991046682e-06, "loss": 18.1349, "step": 3863 }, { "epoch": 0.07063081506936955, "grad_norm": 6.90674757093021, "learning_rate": 9.956810176700178e-06, "loss": 17.8154, "step": 3864 }, { "epoch": 0.07064909426581607, "grad_norm": 8.046083685464394, "learning_rate": 9.956771344980483e-06, "loss": 17.9673, "step": 3865 }, { "epoch": 0.0706673734622626, "grad_norm": 7.859387615224365, "learning_rate": 9.956732495887736e-06, "loss": 18.0598, "step": 3866 }, { "epoch": 0.07068565265870912, "grad_norm": 7.641056445194432, "learning_rate": 9.95669362942207e-06, "loss": 18.1409, "step": 3867 }, { "epoch": 0.07070393185515565, "grad_norm": 8.94055665340746, "learning_rate": 9.956654745583623e-06, "loss": 18.8027, "step": 3868 }, { "epoch": 0.07072221105160217, "grad_norm": 6.691974164414335, "learning_rate": 9.956615844372532e-06, "loss": 17.6638, "step": 3869 }, { "epoch": 0.0707404902480487, "grad_norm": 6.057754083197547, "learning_rate": 9.956576925788934e-06, "loss": 17.2145, "step": 3870 }, { "epoch": 0.07075876944449522, "grad_norm": 7.604425340976951, "learning_rate": 9.956537989832961e-06, "loss": 17.7097, "step": 3871 }, { "epoch": 0.07077704864094174, "grad_norm": 6.472158783529807, "learning_rate": 9.956499036504754e-06, "loss": 17.372, "step": 3872 }, { "epoch": 0.07079532783738827, "grad_norm": 6.465712103051698, "learning_rate": 9.956460065804449e-06, "loss": 17.5418, "step": 3873 }, { "epoch": 0.0708136070338348, "grad_norm": 6.280648814493716, "learning_rate": 9.95642107773218e-06, "loss": 17.2867, "step": 3874 }, { "epoch": 0.07083188623028132, "grad_norm": 7.0407610117133945, "learning_rate": 9.956382072288086e-06, "loss": 17.6109, "step": 3875 }, { "epoch": 0.07085016542672784, "grad_norm": 7.994480393016992, "learning_rate": 9.956343049472302e-06, "loss": 17.8335, "step": 3876 }, { "epoch": 0.07086844462317436, "grad_norm": 7.107490375781242, "learning_rate": 9.956304009284968e-06, "loss": 17.9981, "step": 3877 }, { "epoch": 0.07088672381962088, "grad_norm": 6.842196758503717, "learning_rate": 9.956264951726216e-06, "loss": 17.5187, "step": 3878 }, { "epoch": 0.07090500301606742, "grad_norm": 8.76140509406288, "learning_rate": 9.956225876796185e-06, "loss": 18.4938, "step": 3879 }, { "epoch": 0.07092328221251394, "grad_norm": 8.632549706370371, "learning_rate": 9.956186784495014e-06, "loss": 18.1769, "step": 3880 }, { "epoch": 0.07094156140896046, "grad_norm": 6.453701664390098, "learning_rate": 9.956147674822838e-06, "loss": 17.4795, "step": 3881 }, { "epoch": 0.07095984060540698, "grad_norm": 6.693906139208951, "learning_rate": 9.956108547779795e-06, "loss": 17.4784, "step": 3882 }, { "epoch": 0.0709781198018535, "grad_norm": 8.410888034663625, "learning_rate": 9.956069403366022e-06, "loss": 18.2885, "step": 3883 }, { "epoch": 0.07099639899830003, "grad_norm": 7.609992702552747, "learning_rate": 9.956030241581655e-06, "loss": 18.289, "step": 3884 }, { "epoch": 0.07101467819474656, "grad_norm": 7.361900219256501, "learning_rate": 9.955991062426831e-06, "loss": 18.0587, "step": 3885 }, { "epoch": 0.07103295739119309, "grad_norm": 7.517307551589767, "learning_rate": 9.955951865901689e-06, "loss": 17.9746, "step": 3886 }, { "epoch": 0.07105123658763961, "grad_norm": 7.414760374856745, "learning_rate": 9.955912652006368e-06, "loss": 17.9533, "step": 3887 }, { "epoch": 0.07106951578408613, "grad_norm": 6.804474401638567, "learning_rate": 9.955873420741001e-06, "loss": 17.9495, "step": 3888 }, { "epoch": 0.07108779498053265, "grad_norm": 8.028523634313913, "learning_rate": 9.955834172105728e-06, "loss": 17.9176, "step": 3889 }, { "epoch": 0.07110607417697919, "grad_norm": 7.412426346205773, "learning_rate": 9.955794906100687e-06, "loss": 17.7117, "step": 3890 }, { "epoch": 0.07112435337342571, "grad_norm": 8.429497997814615, "learning_rate": 9.955755622726013e-06, "loss": 18.1698, "step": 3891 }, { "epoch": 0.07114263256987223, "grad_norm": 7.746424875395029, "learning_rate": 9.955716321981847e-06, "loss": 18.2551, "step": 3892 }, { "epoch": 0.07116091176631875, "grad_norm": 6.949464628106947, "learning_rate": 9.955677003868326e-06, "loss": 17.7316, "step": 3893 }, { "epoch": 0.07117919096276527, "grad_norm": 6.86136952791035, "learning_rate": 9.955637668385585e-06, "loss": 17.5325, "step": 3894 }, { "epoch": 0.0711974701592118, "grad_norm": 7.210314086051575, "learning_rate": 9.955598315533767e-06, "loss": 17.4802, "step": 3895 }, { "epoch": 0.07121574935565833, "grad_norm": 8.799671632444277, "learning_rate": 9.955558945313005e-06, "loss": 18.1703, "step": 3896 }, { "epoch": 0.07123402855210485, "grad_norm": 9.348757079785866, "learning_rate": 9.955519557723438e-06, "loss": 18.8315, "step": 3897 }, { "epoch": 0.07125230774855137, "grad_norm": 7.394544857227317, "learning_rate": 9.955480152765206e-06, "loss": 17.5749, "step": 3898 }, { "epoch": 0.0712705869449979, "grad_norm": 8.004974836420036, "learning_rate": 9.955440730438448e-06, "loss": 18.1448, "step": 3899 }, { "epoch": 0.07128886614144442, "grad_norm": 6.7621017788199325, "learning_rate": 9.9554012907433e-06, "loss": 17.7479, "step": 3900 }, { "epoch": 0.07130714533789094, "grad_norm": 6.965136473977121, "learning_rate": 9.955361833679898e-06, "loss": 17.7969, "step": 3901 }, { "epoch": 0.07132542453433748, "grad_norm": 6.460864792601465, "learning_rate": 9.955322359248382e-06, "loss": 17.5655, "step": 3902 }, { "epoch": 0.071343703730784, "grad_norm": 7.078763293573598, "learning_rate": 9.955282867448893e-06, "loss": 17.7961, "step": 3903 }, { "epoch": 0.07136198292723052, "grad_norm": 7.453499886376171, "learning_rate": 9.955243358281567e-06, "loss": 17.8465, "step": 3904 }, { "epoch": 0.07138026212367704, "grad_norm": 7.344132117766187, "learning_rate": 9.955203831746544e-06, "loss": 17.7976, "step": 3905 }, { "epoch": 0.07139854132012356, "grad_norm": 6.28162514941662, "learning_rate": 9.955164287843961e-06, "loss": 17.5085, "step": 3906 }, { "epoch": 0.0714168205165701, "grad_norm": 6.301605044284223, "learning_rate": 9.955124726573957e-06, "loss": 17.2057, "step": 3907 }, { "epoch": 0.07143509971301662, "grad_norm": 8.324558992093518, "learning_rate": 9.955085147936672e-06, "loss": 18.319, "step": 3908 }, { "epoch": 0.07145337890946314, "grad_norm": 39.34073829515376, "learning_rate": 9.955045551932244e-06, "loss": 18.6678, "step": 3909 }, { "epoch": 0.07147165810590966, "grad_norm": 7.0815990079285065, "learning_rate": 9.95500593856081e-06, "loss": 17.5787, "step": 3910 }, { "epoch": 0.07148993730235619, "grad_norm": 7.758661495696111, "learning_rate": 9.954966307822509e-06, "loss": 18.1176, "step": 3911 }, { "epoch": 0.0715082164988027, "grad_norm": 5.4359752949865365, "learning_rate": 9.95492665971748e-06, "loss": 16.9614, "step": 3912 }, { "epoch": 0.07152649569524924, "grad_norm": 6.701826729506655, "learning_rate": 9.954886994245868e-06, "loss": 17.541, "step": 3913 }, { "epoch": 0.07154477489169576, "grad_norm": 7.496048174752599, "learning_rate": 9.954847311407802e-06, "loss": 17.9465, "step": 3914 }, { "epoch": 0.07156305408814229, "grad_norm": 7.645185163814274, "learning_rate": 9.954807611203429e-06, "loss": 17.8904, "step": 3915 }, { "epoch": 0.07158133328458881, "grad_norm": 8.495001972004014, "learning_rate": 9.954767893632885e-06, "loss": 18.0269, "step": 3916 }, { "epoch": 0.07159961248103533, "grad_norm": 7.298855466968175, "learning_rate": 9.95472815869631e-06, "loss": 17.8787, "step": 3917 }, { "epoch": 0.07161789167748185, "grad_norm": 8.149951520511012, "learning_rate": 9.954688406393842e-06, "loss": 18.1366, "step": 3918 }, { "epoch": 0.07163617087392839, "grad_norm": 8.855197248094138, "learning_rate": 9.95464863672562e-06, "loss": 18.1028, "step": 3919 }, { "epoch": 0.07165445007037491, "grad_norm": 8.326114062175687, "learning_rate": 9.954608849691785e-06, "loss": 18.048, "step": 3920 }, { "epoch": 0.07167272926682143, "grad_norm": 8.429519275282749, "learning_rate": 9.954569045292478e-06, "loss": 18.2035, "step": 3921 }, { "epoch": 0.07169100846326795, "grad_norm": 6.783247772055827, "learning_rate": 9.954529223527833e-06, "loss": 17.6596, "step": 3922 }, { "epoch": 0.07170928765971447, "grad_norm": 9.927533382657202, "learning_rate": 9.954489384397994e-06, "loss": 18.119, "step": 3923 }, { "epoch": 0.07172756685616101, "grad_norm": 8.204292956300954, "learning_rate": 9.954449527903101e-06, "loss": 17.7851, "step": 3924 }, { "epoch": 0.07174584605260753, "grad_norm": 7.3709617881107485, "learning_rate": 9.954409654043292e-06, "loss": 17.9231, "step": 3925 }, { "epoch": 0.07176412524905405, "grad_norm": 6.613022753483986, "learning_rate": 9.954369762818708e-06, "loss": 17.7306, "step": 3926 }, { "epoch": 0.07178240444550057, "grad_norm": 7.856914378194838, "learning_rate": 9.954329854229485e-06, "loss": 18.1396, "step": 3927 }, { "epoch": 0.0718006836419471, "grad_norm": 7.518274259323055, "learning_rate": 9.954289928275769e-06, "loss": 17.5279, "step": 3928 }, { "epoch": 0.07181896283839362, "grad_norm": 8.176998030566098, "learning_rate": 9.954249984957695e-06, "loss": 17.9249, "step": 3929 }, { "epoch": 0.07183724203484015, "grad_norm": 7.313919425219405, "learning_rate": 9.954210024275403e-06, "loss": 17.8802, "step": 3930 }, { "epoch": 0.07185552123128668, "grad_norm": 6.774606189206345, "learning_rate": 9.954170046229039e-06, "loss": 17.6312, "step": 3931 }, { "epoch": 0.0718738004277332, "grad_norm": 8.35589329756528, "learning_rate": 9.954130050818735e-06, "loss": 18.0618, "step": 3932 }, { "epoch": 0.07189207962417972, "grad_norm": 7.354747245044039, "learning_rate": 9.954090038044637e-06, "loss": 17.6745, "step": 3933 }, { "epoch": 0.07191035882062624, "grad_norm": 6.369932691161392, "learning_rate": 9.954050007906883e-06, "loss": 17.4517, "step": 3934 }, { "epoch": 0.07192863801707276, "grad_norm": 6.04383158675737, "learning_rate": 9.954009960405613e-06, "loss": 17.2224, "step": 3935 }, { "epoch": 0.0719469172135193, "grad_norm": 8.684643178179634, "learning_rate": 9.953969895540969e-06, "loss": 18.2651, "step": 3936 }, { "epoch": 0.07196519640996582, "grad_norm": 8.006094844175474, "learning_rate": 9.95392981331309e-06, "loss": 18.0621, "step": 3937 }, { "epoch": 0.07198347560641234, "grad_norm": 7.1188474842092635, "learning_rate": 9.953889713722118e-06, "loss": 17.6638, "step": 3938 }, { "epoch": 0.07200175480285886, "grad_norm": 7.686894365864788, "learning_rate": 9.95384959676819e-06, "loss": 17.7463, "step": 3939 }, { "epoch": 0.07202003399930539, "grad_norm": 7.501717431496252, "learning_rate": 9.953809462451452e-06, "loss": 17.8232, "step": 3940 }, { "epoch": 0.07203831319575192, "grad_norm": 8.094419695444945, "learning_rate": 9.95376931077204e-06, "loss": 18.1749, "step": 3941 }, { "epoch": 0.07205659239219844, "grad_norm": 7.936655585513716, "learning_rate": 9.953729141730095e-06, "loss": 17.7579, "step": 3942 }, { "epoch": 0.07207487158864496, "grad_norm": 6.066799466763743, "learning_rate": 9.95368895532576e-06, "loss": 17.4134, "step": 3943 }, { "epoch": 0.07209315078509149, "grad_norm": 7.904978332949921, "learning_rate": 9.953648751559177e-06, "loss": 17.931, "step": 3944 }, { "epoch": 0.07211142998153801, "grad_norm": 7.5199540264729325, "learning_rate": 9.953608530430483e-06, "loss": 18.0643, "step": 3945 }, { "epoch": 0.07212970917798453, "grad_norm": 7.682338263648552, "learning_rate": 9.953568291939822e-06, "loss": 17.7252, "step": 3946 }, { "epoch": 0.07214798837443107, "grad_norm": 8.975695313385652, "learning_rate": 9.953528036087334e-06, "loss": 18.3403, "step": 3947 }, { "epoch": 0.07216626757087759, "grad_norm": 7.9441040428145655, "learning_rate": 9.95348776287316e-06, "loss": 17.718, "step": 3948 }, { "epoch": 0.07218454676732411, "grad_norm": 7.4782322260295855, "learning_rate": 9.953447472297442e-06, "loss": 18.0214, "step": 3949 }, { "epoch": 0.07220282596377063, "grad_norm": 7.993625387885855, "learning_rate": 9.953407164360317e-06, "loss": 17.5924, "step": 3950 }, { "epoch": 0.07222110516021715, "grad_norm": 6.53021490621029, "learning_rate": 9.953366839061932e-06, "loss": 17.3348, "step": 3951 }, { "epoch": 0.07223938435666367, "grad_norm": 7.648185006668104, "learning_rate": 9.953326496402428e-06, "loss": 17.8196, "step": 3952 }, { "epoch": 0.07225766355311021, "grad_norm": 8.067465309558568, "learning_rate": 9.953286136381944e-06, "loss": 18.3634, "step": 3953 }, { "epoch": 0.07227594274955673, "grad_norm": 7.379918960516362, "learning_rate": 9.95324575900062e-06, "loss": 17.9421, "step": 3954 }, { "epoch": 0.07229422194600325, "grad_norm": 7.497042354897406, "learning_rate": 9.9532053642586e-06, "loss": 17.9736, "step": 3955 }, { "epoch": 0.07231250114244978, "grad_norm": 9.806995871791335, "learning_rate": 9.953164952156024e-06, "loss": 18.8548, "step": 3956 }, { "epoch": 0.0723307803388963, "grad_norm": 6.247902363816244, "learning_rate": 9.953124522693036e-06, "loss": 17.4974, "step": 3957 }, { "epoch": 0.07234905953534283, "grad_norm": 7.512323418953835, "learning_rate": 9.953084075869777e-06, "loss": 17.755, "step": 3958 }, { "epoch": 0.07236733873178935, "grad_norm": 8.35778571923837, "learning_rate": 9.953043611686387e-06, "loss": 18.5947, "step": 3959 }, { "epoch": 0.07238561792823588, "grad_norm": 7.266903566371978, "learning_rate": 9.953003130143008e-06, "loss": 17.7397, "step": 3960 }, { "epoch": 0.0724038971246824, "grad_norm": 8.868681862384477, "learning_rate": 9.952962631239785e-06, "loss": 18.2875, "step": 3961 }, { "epoch": 0.07242217632112892, "grad_norm": 7.137759581150072, "learning_rate": 9.952922114976856e-06, "loss": 17.76, "step": 3962 }, { "epoch": 0.07244045551757544, "grad_norm": 6.571967067078624, "learning_rate": 9.952881581354366e-06, "loss": 17.7174, "step": 3963 }, { "epoch": 0.07245873471402198, "grad_norm": 8.084110521015178, "learning_rate": 9.952841030372455e-06, "loss": 18.1417, "step": 3964 }, { "epoch": 0.0724770139104685, "grad_norm": 6.855205404867292, "learning_rate": 9.952800462031267e-06, "loss": 17.7353, "step": 3965 }, { "epoch": 0.07249529310691502, "grad_norm": 7.362992009751969, "learning_rate": 9.952759876330941e-06, "loss": 18.034, "step": 3966 }, { "epoch": 0.07251357230336154, "grad_norm": 7.209436599592481, "learning_rate": 9.952719273271625e-06, "loss": 17.7053, "step": 3967 }, { "epoch": 0.07253185149980806, "grad_norm": 10.08601339002671, "learning_rate": 9.952678652853457e-06, "loss": 18.1241, "step": 3968 }, { "epoch": 0.07255013069625459, "grad_norm": 7.466547884926921, "learning_rate": 9.952638015076577e-06, "loss": 18.0381, "step": 3969 }, { "epoch": 0.07256840989270112, "grad_norm": 7.990963338576961, "learning_rate": 9.952597359941132e-06, "loss": 18.2016, "step": 3970 }, { "epoch": 0.07258668908914764, "grad_norm": 7.256105294918094, "learning_rate": 9.952556687447264e-06, "loss": 17.9083, "step": 3971 }, { "epoch": 0.07260496828559417, "grad_norm": 9.089438335438436, "learning_rate": 9.952515997595113e-06, "loss": 18.5281, "step": 3972 }, { "epoch": 0.07262324748204069, "grad_norm": 8.329081459579776, "learning_rate": 9.952475290384824e-06, "loss": 18.3693, "step": 3973 }, { "epoch": 0.07264152667848721, "grad_norm": 6.519775676102489, "learning_rate": 9.95243456581654e-06, "loss": 17.5964, "step": 3974 }, { "epoch": 0.07265980587493374, "grad_norm": 8.470011468438747, "learning_rate": 9.9523938238904e-06, "loss": 18.4528, "step": 3975 }, { "epoch": 0.07267808507138027, "grad_norm": 6.759815054276921, "learning_rate": 9.952353064606553e-06, "loss": 17.523, "step": 3976 }, { "epoch": 0.07269636426782679, "grad_norm": 6.796952167284505, "learning_rate": 9.952312287965136e-06, "loss": 17.618, "step": 3977 }, { "epoch": 0.07271464346427331, "grad_norm": 6.743480792586531, "learning_rate": 9.952271493966293e-06, "loss": 17.6924, "step": 3978 }, { "epoch": 0.07273292266071983, "grad_norm": 7.624318428914813, "learning_rate": 9.95223068261017e-06, "loss": 17.9983, "step": 3979 }, { "epoch": 0.07275120185716635, "grad_norm": 8.376676897364284, "learning_rate": 9.952189853896907e-06, "loss": 18.4505, "step": 3980 }, { "epoch": 0.07276948105361289, "grad_norm": 6.885788415651327, "learning_rate": 9.95214900782665e-06, "loss": 17.576, "step": 3981 }, { "epoch": 0.07278776025005941, "grad_norm": 6.942396515459639, "learning_rate": 9.952108144399539e-06, "loss": 17.5863, "step": 3982 }, { "epoch": 0.07280603944650593, "grad_norm": 7.3150546757752855, "learning_rate": 9.952067263615719e-06, "loss": 18.1692, "step": 3983 }, { "epoch": 0.07282431864295245, "grad_norm": 7.031306967707324, "learning_rate": 9.952026365475333e-06, "loss": 17.7734, "step": 3984 }, { "epoch": 0.07284259783939898, "grad_norm": 9.289935613803255, "learning_rate": 9.951985449978524e-06, "loss": 18.4523, "step": 3985 }, { "epoch": 0.0728608770358455, "grad_norm": 7.277500424707788, "learning_rate": 9.951944517125436e-06, "loss": 17.8173, "step": 3986 }, { "epoch": 0.07287915623229203, "grad_norm": 7.301225178483241, "learning_rate": 9.951903566916213e-06, "loss": 17.8435, "step": 3987 }, { "epoch": 0.07289743542873856, "grad_norm": 8.825591628752957, "learning_rate": 9.951862599350998e-06, "loss": 18.2613, "step": 3988 }, { "epoch": 0.07291571462518508, "grad_norm": 8.208799393223494, "learning_rate": 9.951821614429934e-06, "loss": 17.8988, "step": 3989 }, { "epoch": 0.0729339938216316, "grad_norm": 8.139743030720322, "learning_rate": 9.951780612153163e-06, "loss": 18.1046, "step": 3990 }, { "epoch": 0.07295227301807812, "grad_norm": 6.784330373655954, "learning_rate": 9.951739592520833e-06, "loss": 17.2882, "step": 3991 }, { "epoch": 0.07297055221452466, "grad_norm": 7.61520459988752, "learning_rate": 9.951698555533082e-06, "loss": 18.0849, "step": 3992 }, { "epoch": 0.07298883141097118, "grad_norm": 7.072648312272576, "learning_rate": 9.951657501190061e-06, "loss": 17.6032, "step": 3993 }, { "epoch": 0.0730071106074177, "grad_norm": 7.917906172330156, "learning_rate": 9.951616429491908e-06, "loss": 18.2392, "step": 3994 }, { "epoch": 0.07302538980386422, "grad_norm": 6.925692444592428, "learning_rate": 9.95157534043877e-06, "loss": 17.6648, "step": 3995 }, { "epoch": 0.07304366900031074, "grad_norm": 7.54444893535727, "learning_rate": 9.951534234030791e-06, "loss": 17.9823, "step": 3996 }, { "epoch": 0.07306194819675726, "grad_norm": 6.612402859175103, "learning_rate": 9.951493110268111e-06, "loss": 17.8484, "step": 3997 }, { "epoch": 0.0730802273932038, "grad_norm": 8.501304712562513, "learning_rate": 9.95145196915088e-06, "loss": 18.386, "step": 3998 }, { "epoch": 0.07309850658965032, "grad_norm": 7.528057117856636, "learning_rate": 9.95141081067924e-06, "loss": 17.9842, "step": 3999 }, { "epoch": 0.07311678578609684, "grad_norm": 6.8067888312337725, "learning_rate": 9.951369634853335e-06, "loss": 17.2365, "step": 4000 }, { "epoch": 0.07313506498254337, "grad_norm": 6.125900010871575, "learning_rate": 9.951328441673307e-06, "loss": 17.3047, "step": 4001 }, { "epoch": 0.07315334417898989, "grad_norm": 6.362462663547174, "learning_rate": 9.951287231139303e-06, "loss": 17.2798, "step": 4002 }, { "epoch": 0.07317162337543641, "grad_norm": 7.327786497261743, "learning_rate": 9.951246003251466e-06, "loss": 17.7502, "step": 4003 }, { "epoch": 0.07318990257188294, "grad_norm": 7.816029605754106, "learning_rate": 9.951204758009944e-06, "loss": 18.0539, "step": 4004 }, { "epoch": 0.07320818176832947, "grad_norm": 6.597918062036601, "learning_rate": 9.951163495414878e-06, "loss": 17.4532, "step": 4005 }, { "epoch": 0.07322646096477599, "grad_norm": 6.82620713681258, "learning_rate": 9.951122215466412e-06, "loss": 17.7182, "step": 4006 }, { "epoch": 0.07324474016122251, "grad_norm": 7.169722973585692, "learning_rate": 9.951080918164693e-06, "loss": 17.589, "step": 4007 }, { "epoch": 0.07326301935766903, "grad_norm": 8.637690090782502, "learning_rate": 9.951039603509867e-06, "loss": 18.7023, "step": 4008 }, { "epoch": 0.07328129855411557, "grad_norm": 7.390510482823445, "learning_rate": 9.950998271502074e-06, "loss": 17.6186, "step": 4009 }, { "epoch": 0.07329957775056209, "grad_norm": 8.516459019691691, "learning_rate": 9.950956922141464e-06, "loss": 18.2599, "step": 4010 }, { "epoch": 0.07331785694700861, "grad_norm": 8.216007324001174, "learning_rate": 9.950915555428178e-06, "loss": 18.036, "step": 4011 }, { "epoch": 0.07333613614345513, "grad_norm": 7.413571630216674, "learning_rate": 9.950874171362364e-06, "loss": 17.9904, "step": 4012 }, { "epoch": 0.07335441533990165, "grad_norm": 6.988794377704065, "learning_rate": 9.950832769944162e-06, "loss": 17.4064, "step": 4013 }, { "epoch": 0.07337269453634818, "grad_norm": 7.667425168599029, "learning_rate": 9.950791351173726e-06, "loss": 18.2205, "step": 4014 }, { "epoch": 0.07339097373279471, "grad_norm": 8.019972368439227, "learning_rate": 9.950749915051192e-06, "loss": 18.0422, "step": 4015 }, { "epoch": 0.07340925292924123, "grad_norm": 7.834211179579535, "learning_rate": 9.950708461576711e-06, "loss": 18.1822, "step": 4016 }, { "epoch": 0.07342753212568776, "grad_norm": 6.469877667609167, "learning_rate": 9.950666990750426e-06, "loss": 17.3202, "step": 4017 }, { "epoch": 0.07344581132213428, "grad_norm": 7.870032854928659, "learning_rate": 9.950625502572484e-06, "loss": 18.0954, "step": 4018 }, { "epoch": 0.0734640905185808, "grad_norm": 7.566350062095447, "learning_rate": 9.950583997043027e-06, "loss": 17.8075, "step": 4019 }, { "epoch": 0.07348236971502732, "grad_norm": 7.780315938865126, "learning_rate": 9.950542474162204e-06, "loss": 17.9308, "step": 4020 }, { "epoch": 0.07350064891147386, "grad_norm": 7.922106515004746, "learning_rate": 9.950500933930158e-06, "loss": 17.9235, "step": 4021 }, { "epoch": 0.07351892810792038, "grad_norm": 7.45719124248031, "learning_rate": 9.950459376347039e-06, "loss": 18.1789, "step": 4022 }, { "epoch": 0.0735372073043669, "grad_norm": 7.22212766677821, "learning_rate": 9.950417801412986e-06, "loss": 17.7704, "step": 4023 }, { "epoch": 0.07355548650081342, "grad_norm": 7.677127034832653, "learning_rate": 9.95037620912815e-06, "loss": 17.8526, "step": 4024 }, { "epoch": 0.07357376569725994, "grad_norm": 8.816380996715482, "learning_rate": 9.950334599492674e-06, "loss": 18.3883, "step": 4025 }, { "epoch": 0.07359204489370648, "grad_norm": 8.390724021524845, "learning_rate": 9.950292972506706e-06, "loss": 18.2191, "step": 4026 }, { "epoch": 0.073610324090153, "grad_norm": 7.368897272812623, "learning_rate": 9.950251328170389e-06, "loss": 17.4886, "step": 4027 }, { "epoch": 0.07362860328659952, "grad_norm": 7.692438911894966, "learning_rate": 9.950209666483873e-06, "loss": 17.9969, "step": 4028 }, { "epoch": 0.07364688248304604, "grad_norm": 7.742024905837131, "learning_rate": 9.9501679874473e-06, "loss": 18.0472, "step": 4029 }, { "epoch": 0.07366516167949257, "grad_norm": 6.126921754427087, "learning_rate": 9.950126291060818e-06, "loss": 17.1164, "step": 4030 }, { "epoch": 0.07368344087593909, "grad_norm": 8.124959370701161, "learning_rate": 9.950084577324573e-06, "loss": 18.0901, "step": 4031 }, { "epoch": 0.07370172007238562, "grad_norm": 7.026397606492624, "learning_rate": 9.95004284623871e-06, "loss": 17.8502, "step": 4032 }, { "epoch": 0.07371999926883215, "grad_norm": 7.895297019900992, "learning_rate": 9.950001097803377e-06, "loss": 17.9183, "step": 4033 }, { "epoch": 0.07373827846527867, "grad_norm": 6.902516650119397, "learning_rate": 9.94995933201872e-06, "loss": 17.728, "step": 4034 }, { "epoch": 0.07375655766172519, "grad_norm": 6.500875070735881, "learning_rate": 9.949917548884886e-06, "loss": 17.5944, "step": 4035 }, { "epoch": 0.07377483685817171, "grad_norm": 7.61340253150107, "learning_rate": 9.94987574840202e-06, "loss": 17.8991, "step": 4036 }, { "epoch": 0.07379311605461823, "grad_norm": 7.249979086155434, "learning_rate": 9.949833930570267e-06, "loss": 17.8095, "step": 4037 }, { "epoch": 0.07381139525106477, "grad_norm": 6.905778959602601, "learning_rate": 9.949792095389778e-06, "loss": 17.5323, "step": 4038 }, { "epoch": 0.07382967444751129, "grad_norm": 7.230588307395547, "learning_rate": 9.949750242860696e-06, "loss": 17.9275, "step": 4039 }, { "epoch": 0.07384795364395781, "grad_norm": 8.082270819590986, "learning_rate": 9.94970837298317e-06, "loss": 18.1718, "step": 4040 }, { "epoch": 0.07386623284040433, "grad_norm": 7.657347363032943, "learning_rate": 9.949666485757345e-06, "loss": 17.7518, "step": 4041 }, { "epoch": 0.07388451203685086, "grad_norm": 6.796068906671142, "learning_rate": 9.949624581183369e-06, "loss": 17.4804, "step": 4042 }, { "epoch": 0.07390279123329739, "grad_norm": 7.122222622706773, "learning_rate": 9.949582659261387e-06, "loss": 17.6666, "step": 4043 }, { "epoch": 0.07392107042974391, "grad_norm": 8.144535069668137, "learning_rate": 9.949540719991549e-06, "loss": 17.8327, "step": 4044 }, { "epoch": 0.07393934962619043, "grad_norm": 7.21638860677488, "learning_rate": 9.949498763374e-06, "loss": 17.7529, "step": 4045 }, { "epoch": 0.07395762882263696, "grad_norm": 6.333474243227332, "learning_rate": 9.949456789408887e-06, "loss": 17.2955, "step": 4046 }, { "epoch": 0.07397590801908348, "grad_norm": 6.911443461586524, "learning_rate": 9.949414798096357e-06, "loss": 17.5879, "step": 4047 }, { "epoch": 0.07399418721553, "grad_norm": 6.792686640770893, "learning_rate": 9.949372789436559e-06, "loss": 17.7279, "step": 4048 }, { "epoch": 0.07401246641197654, "grad_norm": 7.241880275905046, "learning_rate": 9.949330763429637e-06, "loss": 18.0251, "step": 4049 }, { "epoch": 0.07403074560842306, "grad_norm": 8.604560593156664, "learning_rate": 9.949288720075741e-06, "loss": 18.1001, "step": 4050 }, { "epoch": 0.07404902480486958, "grad_norm": 6.813629230605672, "learning_rate": 9.949246659375017e-06, "loss": 17.4705, "step": 4051 }, { "epoch": 0.0740673040013161, "grad_norm": 8.35377728722315, "learning_rate": 9.949204581327615e-06, "loss": 18.4, "step": 4052 }, { "epoch": 0.07408558319776262, "grad_norm": 7.689640765761798, "learning_rate": 9.949162485933677e-06, "loss": 18.1278, "step": 4053 }, { "epoch": 0.07410386239420914, "grad_norm": 6.838277020464443, "learning_rate": 9.949120373193354e-06, "loss": 17.4119, "step": 4054 }, { "epoch": 0.07412214159065568, "grad_norm": 6.290167469321867, "learning_rate": 9.949078243106797e-06, "loss": 17.1962, "step": 4055 }, { "epoch": 0.0741404207871022, "grad_norm": 7.008453919533337, "learning_rate": 9.949036095674148e-06, "loss": 17.5201, "step": 4056 }, { "epoch": 0.07415869998354872, "grad_norm": 7.558657647542814, "learning_rate": 9.948993930895558e-06, "loss": 17.9684, "step": 4057 }, { "epoch": 0.07417697917999524, "grad_norm": 7.365638752495691, "learning_rate": 9.948951748771174e-06, "loss": 17.7711, "step": 4058 }, { "epoch": 0.07419525837644177, "grad_norm": 8.646260946036872, "learning_rate": 9.948909549301143e-06, "loss": 18.1282, "step": 4059 }, { "epoch": 0.0742135375728883, "grad_norm": 8.438851141537185, "learning_rate": 9.948867332485613e-06, "loss": 18.2028, "step": 4060 }, { "epoch": 0.07423181676933482, "grad_norm": 8.511358088612571, "learning_rate": 9.948825098324732e-06, "loss": 18.4583, "step": 4061 }, { "epoch": 0.07425009596578135, "grad_norm": 6.702850206941317, "learning_rate": 9.948782846818649e-06, "loss": 17.29, "step": 4062 }, { "epoch": 0.07426837516222787, "grad_norm": 9.318593419797512, "learning_rate": 9.948740577967512e-06, "loss": 18.9032, "step": 4063 }, { "epoch": 0.07428665435867439, "grad_norm": 8.15920685077243, "learning_rate": 9.948698291771467e-06, "loss": 17.9272, "step": 4064 }, { "epoch": 0.07430493355512091, "grad_norm": 7.552555566038091, "learning_rate": 9.948655988230667e-06, "loss": 17.9185, "step": 4065 }, { "epoch": 0.07432321275156745, "grad_norm": 7.421642743888335, "learning_rate": 9.948613667345255e-06, "loss": 17.9962, "step": 4066 }, { "epoch": 0.07434149194801397, "grad_norm": 7.589915691254926, "learning_rate": 9.948571329115382e-06, "loss": 17.811, "step": 4067 }, { "epoch": 0.07435977114446049, "grad_norm": 6.773882338508222, "learning_rate": 9.948528973541195e-06, "loss": 17.598, "step": 4068 }, { "epoch": 0.07437805034090701, "grad_norm": 7.744106412655492, "learning_rate": 9.948486600622845e-06, "loss": 17.9758, "step": 4069 }, { "epoch": 0.07439632953735353, "grad_norm": 6.691671558962903, "learning_rate": 9.948444210360478e-06, "loss": 17.4987, "step": 4070 }, { "epoch": 0.07441460873380006, "grad_norm": 7.116877447696861, "learning_rate": 9.948401802754245e-06, "loss": 17.7262, "step": 4071 }, { "epoch": 0.07443288793024659, "grad_norm": 6.537008838427648, "learning_rate": 9.948359377804291e-06, "loss": 17.6175, "step": 4072 }, { "epoch": 0.07445116712669311, "grad_norm": 9.792917696642098, "learning_rate": 9.948316935510766e-06, "loss": 18.7539, "step": 4073 }, { "epoch": 0.07446944632313963, "grad_norm": 7.123510681040195, "learning_rate": 9.948274475873823e-06, "loss": 17.885, "step": 4074 }, { "epoch": 0.07448772551958616, "grad_norm": 7.761363184437969, "learning_rate": 9.948231998893605e-06, "loss": 18.178, "step": 4075 }, { "epoch": 0.07450600471603268, "grad_norm": 7.701810438309446, "learning_rate": 9.948189504570263e-06, "loss": 18.1316, "step": 4076 }, { "epoch": 0.07452428391247921, "grad_norm": 8.334436625697984, "learning_rate": 9.948146992903947e-06, "loss": 17.8821, "step": 4077 }, { "epoch": 0.07454256310892574, "grad_norm": 7.408338899300994, "learning_rate": 9.948104463894805e-06, "loss": 17.8913, "step": 4078 }, { "epoch": 0.07456084230537226, "grad_norm": 6.739679926354867, "learning_rate": 9.948061917542986e-06, "loss": 17.7448, "step": 4079 }, { "epoch": 0.07457912150181878, "grad_norm": 7.439166133164964, "learning_rate": 9.94801935384864e-06, "loss": 18.0901, "step": 4080 }, { "epoch": 0.0745974006982653, "grad_norm": 7.322434827980138, "learning_rate": 9.947976772811915e-06, "loss": 17.8107, "step": 4081 }, { "epoch": 0.07461567989471182, "grad_norm": 7.209222093140329, "learning_rate": 9.94793417443296e-06, "loss": 17.9077, "step": 4082 }, { "epoch": 0.07463395909115836, "grad_norm": 7.28439817785033, "learning_rate": 9.947891558711926e-06, "loss": 17.854, "step": 4083 }, { "epoch": 0.07465223828760488, "grad_norm": 6.633480726270058, "learning_rate": 9.947848925648961e-06, "loss": 17.5347, "step": 4084 }, { "epoch": 0.0746705174840514, "grad_norm": 8.384485505266616, "learning_rate": 9.947806275244216e-06, "loss": 18.024, "step": 4085 }, { "epoch": 0.07468879668049792, "grad_norm": 6.944517935602008, "learning_rate": 9.947763607497839e-06, "loss": 17.8275, "step": 4086 }, { "epoch": 0.07470707587694445, "grad_norm": 7.061560685453435, "learning_rate": 9.94772092240998e-06, "loss": 17.4181, "step": 4087 }, { "epoch": 0.07472535507339097, "grad_norm": 9.971019832050642, "learning_rate": 9.947678219980788e-06, "loss": 19.1205, "step": 4088 }, { "epoch": 0.0747436342698375, "grad_norm": 6.843703280412826, "learning_rate": 9.947635500210413e-06, "loss": 17.4305, "step": 4089 }, { "epoch": 0.07476191346628402, "grad_norm": 6.489921106555448, "learning_rate": 9.947592763099005e-06, "loss": 17.2924, "step": 4090 }, { "epoch": 0.07478019266273055, "grad_norm": 6.624219617143493, "learning_rate": 9.947550008646713e-06, "loss": 17.3873, "step": 4091 }, { "epoch": 0.07479847185917707, "grad_norm": 7.034030188330044, "learning_rate": 9.947507236853689e-06, "loss": 17.6899, "step": 4092 }, { "epoch": 0.07481675105562359, "grad_norm": 8.931076521754736, "learning_rate": 9.947464447720082e-06, "loss": 18.4088, "step": 4093 }, { "epoch": 0.07483503025207013, "grad_norm": 7.8038980993245515, "learning_rate": 9.947421641246038e-06, "loss": 18.0855, "step": 4094 }, { "epoch": 0.07485330944851665, "grad_norm": 7.184042813121832, "learning_rate": 9.947378817431714e-06, "loss": 17.754, "step": 4095 }, { "epoch": 0.07487158864496317, "grad_norm": 6.9364919302493835, "learning_rate": 9.947335976277256e-06, "loss": 17.6305, "step": 4096 }, { "epoch": 0.07488986784140969, "grad_norm": 7.332355591307151, "learning_rate": 9.947293117782813e-06, "loss": 17.736, "step": 4097 }, { "epoch": 0.07490814703785621, "grad_norm": 6.37794444152234, "learning_rate": 9.947250241948538e-06, "loss": 17.3367, "step": 4098 }, { "epoch": 0.07492642623430273, "grad_norm": 7.422075968050891, "learning_rate": 9.94720734877458e-06, "loss": 17.7243, "step": 4099 }, { "epoch": 0.07494470543074927, "grad_norm": 6.284693446169713, "learning_rate": 9.94716443826109e-06, "loss": 17.6514, "step": 4100 }, { "epoch": 0.07496298462719579, "grad_norm": 6.969561100740545, "learning_rate": 9.947121510408218e-06, "loss": 17.3915, "step": 4101 }, { "epoch": 0.07498126382364231, "grad_norm": 7.057265356575579, "learning_rate": 9.947078565216112e-06, "loss": 17.8751, "step": 4102 }, { "epoch": 0.07499954302008884, "grad_norm": 8.521052023661452, "learning_rate": 9.947035602684929e-06, "loss": 18.1689, "step": 4103 }, { "epoch": 0.07501782221653536, "grad_norm": 8.966329658182065, "learning_rate": 9.946992622814813e-06, "loss": 18.0758, "step": 4104 }, { "epoch": 0.07503610141298188, "grad_norm": 6.917196134949586, "learning_rate": 9.946949625605918e-06, "loss": 17.7005, "step": 4105 }, { "epoch": 0.07505438060942841, "grad_norm": 7.431754061761407, "learning_rate": 9.946906611058394e-06, "loss": 17.5164, "step": 4106 }, { "epoch": 0.07507265980587494, "grad_norm": 7.824983042402745, "learning_rate": 9.94686357917239e-06, "loss": 18.0068, "step": 4107 }, { "epoch": 0.07509093900232146, "grad_norm": 6.246252175853513, "learning_rate": 9.94682052994806e-06, "loss": 17.4046, "step": 4108 }, { "epoch": 0.07510921819876798, "grad_norm": 8.081686411815316, "learning_rate": 9.946777463385552e-06, "loss": 18.3135, "step": 4109 }, { "epoch": 0.0751274973952145, "grad_norm": 7.337183123648379, "learning_rate": 9.94673437948502e-06, "loss": 17.6567, "step": 4110 }, { "epoch": 0.07514577659166104, "grad_norm": 7.780530171818472, "learning_rate": 9.946691278246611e-06, "loss": 17.9705, "step": 4111 }, { "epoch": 0.07516405578810756, "grad_norm": 6.994340854220237, "learning_rate": 9.94664815967048e-06, "loss": 17.965, "step": 4112 }, { "epoch": 0.07518233498455408, "grad_norm": 9.142171164436062, "learning_rate": 9.946605023756776e-06, "loss": 18.7063, "step": 4113 }, { "epoch": 0.0752006141810006, "grad_norm": 8.526814159440248, "learning_rate": 9.94656187050565e-06, "loss": 18.3471, "step": 4114 }, { "epoch": 0.07521889337744712, "grad_norm": 8.498938940911465, "learning_rate": 9.946518699917254e-06, "loss": 18.1441, "step": 4115 }, { "epoch": 0.07523717257389365, "grad_norm": 8.097700734472312, "learning_rate": 9.94647551199174e-06, "loss": 18.1501, "step": 4116 }, { "epoch": 0.07525545177034018, "grad_norm": 7.225025441865226, "learning_rate": 9.94643230672926e-06, "loss": 17.9419, "step": 4117 }, { "epoch": 0.0752737309667867, "grad_norm": 6.190358687026648, "learning_rate": 9.946389084129962e-06, "loss": 17.3257, "step": 4118 }, { "epoch": 0.07529201016323323, "grad_norm": 7.727382977574147, "learning_rate": 9.946345844194e-06, "loss": 18.0806, "step": 4119 }, { "epoch": 0.07531028935967975, "grad_norm": 8.843536075563375, "learning_rate": 9.946302586921523e-06, "loss": 18.3376, "step": 4120 }, { "epoch": 0.07532856855612627, "grad_norm": 7.095710743979115, "learning_rate": 9.946259312312687e-06, "loss": 17.499, "step": 4121 }, { "epoch": 0.07534684775257279, "grad_norm": 9.882176262423766, "learning_rate": 9.946216020367641e-06, "loss": 18.5481, "step": 4122 }, { "epoch": 0.07536512694901933, "grad_norm": 6.449542517877799, "learning_rate": 9.946172711086538e-06, "loss": 17.4244, "step": 4123 }, { "epoch": 0.07538340614546585, "grad_norm": 6.409241883041255, "learning_rate": 9.946129384469526e-06, "loss": 17.2406, "step": 4124 }, { "epoch": 0.07540168534191237, "grad_norm": 7.892889481909539, "learning_rate": 9.946086040516762e-06, "loss": 18.6329, "step": 4125 }, { "epoch": 0.07541996453835889, "grad_norm": 7.326463313717324, "learning_rate": 9.946042679228394e-06, "loss": 17.5176, "step": 4126 }, { "epoch": 0.07543824373480541, "grad_norm": 7.5983136570427785, "learning_rate": 9.945999300604578e-06, "loss": 18.0372, "step": 4127 }, { "epoch": 0.07545652293125195, "grad_norm": 8.566667387731414, "learning_rate": 9.945955904645461e-06, "loss": 18.5739, "step": 4128 }, { "epoch": 0.07547480212769847, "grad_norm": 6.308984762709798, "learning_rate": 9.945912491351199e-06, "loss": 17.4579, "step": 4129 }, { "epoch": 0.07549308132414499, "grad_norm": 7.636671245026625, "learning_rate": 9.945869060721945e-06, "loss": 17.8382, "step": 4130 }, { "epoch": 0.07551136052059151, "grad_norm": 7.152409547096006, "learning_rate": 9.945825612757846e-06, "loss": 17.6699, "step": 4131 }, { "epoch": 0.07552963971703804, "grad_norm": 6.251052994274618, "learning_rate": 9.945782147459057e-06, "loss": 17.6965, "step": 4132 }, { "epoch": 0.07554791891348456, "grad_norm": 6.497513929491633, "learning_rate": 9.945738664825734e-06, "loss": 17.6654, "step": 4133 }, { "epoch": 0.0755661981099311, "grad_norm": 7.20433608214677, "learning_rate": 9.945695164858024e-06, "loss": 17.6154, "step": 4134 }, { "epoch": 0.07558447730637761, "grad_norm": 8.678988533293408, "learning_rate": 9.945651647556083e-06, "loss": 18.2711, "step": 4135 }, { "epoch": 0.07560275650282414, "grad_norm": 7.675365740961694, "learning_rate": 9.94560811292006e-06, "loss": 17.7857, "step": 4136 }, { "epoch": 0.07562103569927066, "grad_norm": 6.623604759756009, "learning_rate": 9.945564560950112e-06, "loss": 17.5991, "step": 4137 }, { "epoch": 0.07563931489571718, "grad_norm": 6.792192435163245, "learning_rate": 9.945520991646387e-06, "loss": 17.7569, "step": 4138 }, { "epoch": 0.0756575940921637, "grad_norm": 6.2358037854952535, "learning_rate": 9.945477405009041e-06, "loss": 17.4349, "step": 4139 }, { "epoch": 0.07567587328861024, "grad_norm": 6.7391726750467225, "learning_rate": 9.945433801038226e-06, "loss": 17.6439, "step": 4140 }, { "epoch": 0.07569415248505676, "grad_norm": 6.574972610136312, "learning_rate": 9.945390179734095e-06, "loss": 17.4439, "step": 4141 }, { "epoch": 0.07571243168150328, "grad_norm": 6.728158734735512, "learning_rate": 9.9453465410968e-06, "loss": 17.617, "step": 4142 }, { "epoch": 0.0757307108779498, "grad_norm": 7.279794345302788, "learning_rate": 9.945302885126496e-06, "loss": 18.1515, "step": 4143 }, { "epoch": 0.07574899007439632, "grad_norm": 7.039529505380779, "learning_rate": 9.945259211823332e-06, "loss": 17.6819, "step": 4144 }, { "epoch": 0.07576726927084286, "grad_norm": 6.995006558034378, "learning_rate": 9.945215521187465e-06, "loss": 17.8893, "step": 4145 }, { "epoch": 0.07578554846728938, "grad_norm": 7.013601369894375, "learning_rate": 9.945171813219048e-06, "loss": 17.7406, "step": 4146 }, { "epoch": 0.0758038276637359, "grad_norm": 7.644985109226018, "learning_rate": 9.945128087918233e-06, "loss": 17.93, "step": 4147 }, { "epoch": 0.07582210686018243, "grad_norm": 8.513745268468803, "learning_rate": 9.945084345285172e-06, "loss": 18.1143, "step": 4148 }, { "epoch": 0.07584038605662895, "grad_norm": 8.338563306688147, "learning_rate": 9.945040585320019e-06, "loss": 18.039, "step": 4149 }, { "epoch": 0.07585866525307547, "grad_norm": 8.422151833158328, "learning_rate": 9.944996808022926e-06, "loss": 17.9575, "step": 4150 }, { "epoch": 0.075876944449522, "grad_norm": 7.36414342599897, "learning_rate": 9.944953013394053e-06, "loss": 17.6962, "step": 4151 }, { "epoch": 0.07589522364596853, "grad_norm": 6.611161073638273, "learning_rate": 9.944909201433545e-06, "loss": 17.4743, "step": 4152 }, { "epoch": 0.07591350284241505, "grad_norm": 8.11113165044071, "learning_rate": 9.944865372141562e-06, "loss": 17.8998, "step": 4153 }, { "epoch": 0.07593178203886157, "grad_norm": 8.147540883815966, "learning_rate": 9.94482152551825e-06, "loss": 17.7785, "step": 4154 }, { "epoch": 0.07595006123530809, "grad_norm": 6.037791007125641, "learning_rate": 9.944777661563774e-06, "loss": 17.1556, "step": 4155 }, { "epoch": 0.07596834043175461, "grad_norm": 8.005885219212052, "learning_rate": 9.944733780278277e-06, "loss": 18.1231, "step": 4156 }, { "epoch": 0.07598661962820115, "grad_norm": 7.258942344087547, "learning_rate": 9.94468988166192e-06, "loss": 17.6645, "step": 4157 }, { "epoch": 0.07600489882464767, "grad_norm": 6.8830099394215285, "learning_rate": 9.944645965714851e-06, "loss": 17.6559, "step": 4158 }, { "epoch": 0.07602317802109419, "grad_norm": 6.907603205447108, "learning_rate": 9.94460203243723e-06, "loss": 17.5304, "step": 4159 }, { "epoch": 0.07604145721754071, "grad_norm": 7.609951382296154, "learning_rate": 9.944558081829208e-06, "loss": 17.7897, "step": 4160 }, { "epoch": 0.07605973641398724, "grad_norm": 8.234379233176638, "learning_rate": 9.944514113890938e-06, "loss": 18.4789, "step": 4161 }, { "epoch": 0.07607801561043377, "grad_norm": 8.28646045976863, "learning_rate": 9.944470128622574e-06, "loss": 17.8717, "step": 4162 }, { "epoch": 0.0760962948068803, "grad_norm": 7.497066969234497, "learning_rate": 9.944426126024271e-06, "loss": 17.9437, "step": 4163 }, { "epoch": 0.07611457400332682, "grad_norm": 7.739957927204389, "learning_rate": 9.944382106096186e-06, "loss": 17.8309, "step": 4164 }, { "epoch": 0.07613285319977334, "grad_norm": 8.3163273818476, "learning_rate": 9.944338068838469e-06, "loss": 18.029, "step": 4165 }, { "epoch": 0.07615113239621986, "grad_norm": 7.273911546813022, "learning_rate": 9.944294014251276e-06, "loss": 17.7673, "step": 4166 }, { "epoch": 0.07616941159266638, "grad_norm": 7.337268981521612, "learning_rate": 9.944249942334763e-06, "loss": 17.8419, "step": 4167 }, { "epoch": 0.07618769078911292, "grad_norm": 8.363235672027164, "learning_rate": 9.944205853089081e-06, "loss": 18.3541, "step": 4168 }, { "epoch": 0.07620596998555944, "grad_norm": 8.452200252193368, "learning_rate": 9.944161746514387e-06, "loss": 17.9309, "step": 4169 }, { "epoch": 0.07622424918200596, "grad_norm": 8.218646460160082, "learning_rate": 9.944117622610836e-06, "loss": 18.2633, "step": 4170 }, { "epoch": 0.07624252837845248, "grad_norm": 6.357008423240061, "learning_rate": 9.944073481378581e-06, "loss": 17.5283, "step": 4171 }, { "epoch": 0.076260807574899, "grad_norm": 7.7439428807246715, "learning_rate": 9.94402932281778e-06, "loss": 17.9626, "step": 4172 }, { "epoch": 0.07627908677134553, "grad_norm": 7.774958259158812, "learning_rate": 9.94398514692858e-06, "loss": 18.0529, "step": 4173 }, { "epoch": 0.07629736596779206, "grad_norm": 7.238700095656024, "learning_rate": 9.943940953711147e-06, "loss": 17.8406, "step": 4174 }, { "epoch": 0.07631564516423858, "grad_norm": 6.849452434091139, "learning_rate": 9.943896743165626e-06, "loss": 17.5181, "step": 4175 }, { "epoch": 0.0763339243606851, "grad_norm": 6.757854383035227, "learning_rate": 9.943852515292177e-06, "loss": 17.6835, "step": 4176 }, { "epoch": 0.07635220355713163, "grad_norm": 7.455393760134097, "learning_rate": 9.943808270090953e-06, "loss": 17.6921, "step": 4177 }, { "epoch": 0.07637048275357815, "grad_norm": 5.992826452646012, "learning_rate": 9.943764007562113e-06, "loss": 17.6179, "step": 4178 }, { "epoch": 0.07638876195002468, "grad_norm": 6.886465080017079, "learning_rate": 9.943719727705807e-06, "loss": 17.6493, "step": 4179 }, { "epoch": 0.0764070411464712, "grad_norm": 8.684834787939241, "learning_rate": 9.943675430522191e-06, "loss": 18.7955, "step": 4180 }, { "epoch": 0.07642532034291773, "grad_norm": 7.323220417378751, "learning_rate": 9.943631116011424e-06, "loss": 17.9174, "step": 4181 }, { "epoch": 0.07644359953936425, "grad_norm": 10.592096380410188, "learning_rate": 9.943586784173657e-06, "loss": 18.4581, "step": 4182 }, { "epoch": 0.07646187873581077, "grad_norm": 7.469103899829183, "learning_rate": 9.94354243500905e-06, "loss": 17.918, "step": 4183 }, { "epoch": 0.07648015793225729, "grad_norm": 8.429324116324565, "learning_rate": 9.943498068517754e-06, "loss": 18.197, "step": 4184 }, { "epoch": 0.07649843712870383, "grad_norm": 7.630150917105338, "learning_rate": 9.943453684699927e-06, "loss": 18.0674, "step": 4185 }, { "epoch": 0.07651671632515035, "grad_norm": 7.5848018531771855, "learning_rate": 9.943409283555722e-06, "loss": 17.9029, "step": 4186 }, { "epoch": 0.07653499552159687, "grad_norm": 6.024753089526956, "learning_rate": 9.943364865085298e-06, "loss": 17.2247, "step": 4187 }, { "epoch": 0.0765532747180434, "grad_norm": 7.86967665440768, "learning_rate": 9.943320429288808e-06, "loss": 17.8321, "step": 4188 }, { "epoch": 0.07657155391448991, "grad_norm": 6.302985308683977, "learning_rate": 9.943275976166412e-06, "loss": 17.1027, "step": 4189 }, { "epoch": 0.07658983311093644, "grad_norm": 6.833109397879678, "learning_rate": 9.943231505718258e-06, "loss": 17.6025, "step": 4190 }, { "epoch": 0.07660811230738297, "grad_norm": 8.365916398266814, "learning_rate": 9.94318701794451e-06, "loss": 18.2189, "step": 4191 }, { "epoch": 0.0766263915038295, "grad_norm": 8.477496443844009, "learning_rate": 9.943142512845318e-06, "loss": 18.3351, "step": 4192 }, { "epoch": 0.07664467070027602, "grad_norm": 6.698769837563342, "learning_rate": 9.943097990420843e-06, "loss": 17.3659, "step": 4193 }, { "epoch": 0.07666294989672254, "grad_norm": 7.8840085628076935, "learning_rate": 9.943053450671236e-06, "loss": 18.0192, "step": 4194 }, { "epoch": 0.07668122909316906, "grad_norm": 6.602380845223933, "learning_rate": 9.943008893596657e-06, "loss": 17.426, "step": 4195 }, { "epoch": 0.0766995082896156, "grad_norm": 6.888029864878589, "learning_rate": 9.94296431919726e-06, "loss": 17.6183, "step": 4196 }, { "epoch": 0.07671778748606212, "grad_norm": 8.251367427605121, "learning_rate": 9.942919727473204e-06, "loss": 18.3592, "step": 4197 }, { "epoch": 0.07673606668250864, "grad_norm": 7.964296308652757, "learning_rate": 9.942875118424641e-06, "loss": 18.2928, "step": 4198 }, { "epoch": 0.07675434587895516, "grad_norm": 8.075699505373022, "learning_rate": 9.94283049205173e-06, "loss": 18.2313, "step": 4199 }, { "epoch": 0.07677262507540168, "grad_norm": 7.184827022794858, "learning_rate": 9.942785848354628e-06, "loss": 17.7186, "step": 4200 }, { "epoch": 0.0767909042718482, "grad_norm": 7.626331857415009, "learning_rate": 9.94274118733349e-06, "loss": 17.826, "step": 4201 }, { "epoch": 0.07680918346829474, "grad_norm": 6.239000698427575, "learning_rate": 9.942696508988472e-06, "loss": 17.4026, "step": 4202 }, { "epoch": 0.07682746266474126, "grad_norm": 6.543866858237177, "learning_rate": 9.942651813319733e-06, "loss": 17.5936, "step": 4203 }, { "epoch": 0.07684574186118778, "grad_norm": 8.238881612293955, "learning_rate": 9.942607100327428e-06, "loss": 18.183, "step": 4204 }, { "epoch": 0.0768640210576343, "grad_norm": 7.722407507706026, "learning_rate": 9.942562370011714e-06, "loss": 18.4659, "step": 4205 }, { "epoch": 0.07688230025408083, "grad_norm": 7.565818940073065, "learning_rate": 9.942517622372749e-06, "loss": 17.9724, "step": 4206 }, { "epoch": 0.07690057945052735, "grad_norm": 7.277322276406721, "learning_rate": 9.942472857410687e-06, "loss": 17.6006, "step": 4207 }, { "epoch": 0.07691885864697388, "grad_norm": 6.97125103493985, "learning_rate": 9.942428075125688e-06, "loss": 17.4252, "step": 4208 }, { "epoch": 0.0769371378434204, "grad_norm": 8.40918433822153, "learning_rate": 9.942383275517905e-06, "loss": 18.2623, "step": 4209 }, { "epoch": 0.07695541703986693, "grad_norm": 7.667828948326944, "learning_rate": 9.942338458587499e-06, "loss": 17.804, "step": 4210 }, { "epoch": 0.07697369623631345, "grad_norm": 8.03912646465718, "learning_rate": 9.942293624334627e-06, "loss": 18.2884, "step": 4211 }, { "epoch": 0.07699197543275997, "grad_norm": 7.589292928257367, "learning_rate": 9.942248772759443e-06, "loss": 17.6766, "step": 4212 }, { "epoch": 0.0770102546292065, "grad_norm": 7.67375910134019, "learning_rate": 9.942203903862105e-06, "loss": 17.9565, "step": 4213 }, { "epoch": 0.07702853382565303, "grad_norm": 8.881236727814375, "learning_rate": 9.942159017642772e-06, "loss": 18.1565, "step": 4214 }, { "epoch": 0.07704681302209955, "grad_norm": 7.164802997735672, "learning_rate": 9.9421141141016e-06, "loss": 17.4539, "step": 4215 }, { "epoch": 0.07706509221854607, "grad_norm": 7.440014489282296, "learning_rate": 9.942069193238749e-06, "loss": 17.8582, "step": 4216 }, { "epoch": 0.0770833714149926, "grad_norm": 7.292631624423447, "learning_rate": 9.942024255054373e-06, "loss": 17.6257, "step": 4217 }, { "epoch": 0.07710165061143912, "grad_norm": 7.431778358488795, "learning_rate": 9.941979299548631e-06, "loss": 17.9583, "step": 4218 }, { "epoch": 0.07711992980788565, "grad_norm": 8.686717295254155, "learning_rate": 9.94193432672168e-06, "loss": 18.0693, "step": 4219 }, { "epoch": 0.07713820900433217, "grad_norm": 7.077506617393422, "learning_rate": 9.941889336573678e-06, "loss": 17.575, "step": 4220 }, { "epoch": 0.0771564882007787, "grad_norm": 7.241133479233001, "learning_rate": 9.941844329104784e-06, "loss": 17.4906, "step": 4221 }, { "epoch": 0.07717476739722522, "grad_norm": 8.624029205933166, "learning_rate": 9.941799304315152e-06, "loss": 18.6572, "step": 4222 }, { "epoch": 0.07719304659367174, "grad_norm": 6.033995693434952, "learning_rate": 9.941754262204945e-06, "loss": 17.4807, "step": 4223 }, { "epoch": 0.07721132579011826, "grad_norm": 6.659671442220002, "learning_rate": 9.941709202774317e-06, "loss": 17.5266, "step": 4224 }, { "epoch": 0.0772296049865648, "grad_norm": 7.65291781378316, "learning_rate": 9.941664126023427e-06, "loss": 17.9475, "step": 4225 }, { "epoch": 0.07724788418301132, "grad_norm": 7.73272908049448, "learning_rate": 9.941619031952433e-06, "loss": 18.2557, "step": 4226 }, { "epoch": 0.07726616337945784, "grad_norm": 7.436490386916161, "learning_rate": 9.941573920561493e-06, "loss": 17.7364, "step": 4227 }, { "epoch": 0.07728444257590436, "grad_norm": 7.1492946285383985, "learning_rate": 9.941528791850765e-06, "loss": 17.4325, "step": 4228 }, { "epoch": 0.07730272177235088, "grad_norm": 7.145356946317946, "learning_rate": 9.941483645820406e-06, "loss": 17.8871, "step": 4229 }, { "epoch": 0.07732100096879742, "grad_norm": 7.033499107558293, "learning_rate": 9.941438482470578e-06, "loss": 17.6935, "step": 4230 }, { "epoch": 0.07733928016524394, "grad_norm": 9.827010628938945, "learning_rate": 9.941393301801436e-06, "loss": 18.3654, "step": 4231 }, { "epoch": 0.07735755936169046, "grad_norm": 7.0975941131186735, "learning_rate": 9.94134810381314e-06, "loss": 18.0554, "step": 4232 }, { "epoch": 0.07737583855813698, "grad_norm": 5.92454778215829, "learning_rate": 9.941302888505848e-06, "loss": 17.1777, "step": 4233 }, { "epoch": 0.0773941177545835, "grad_norm": 6.959103247304966, "learning_rate": 9.941257655879717e-06, "loss": 17.8269, "step": 4234 }, { "epoch": 0.07741239695103003, "grad_norm": 7.022073491872154, "learning_rate": 9.941212405934907e-06, "loss": 17.5797, "step": 4235 }, { "epoch": 0.07743067614747656, "grad_norm": 7.1063690036947715, "learning_rate": 9.941167138671577e-06, "loss": 17.7828, "step": 4236 }, { "epoch": 0.07744895534392308, "grad_norm": 7.954584175919247, "learning_rate": 9.941121854089883e-06, "loss": 17.995, "step": 4237 }, { "epoch": 0.0774672345403696, "grad_norm": 6.729010062244928, "learning_rate": 9.941076552189987e-06, "loss": 17.4944, "step": 4238 }, { "epoch": 0.07748551373681613, "grad_norm": 6.587494040315997, "learning_rate": 9.941031232972047e-06, "loss": 17.5591, "step": 4239 }, { "epoch": 0.07750379293326265, "grad_norm": 5.837888384091902, "learning_rate": 9.940985896436221e-06, "loss": 17.161, "step": 4240 }, { "epoch": 0.07752207212970917, "grad_norm": 7.4309429538383425, "learning_rate": 9.940940542582668e-06, "loss": 17.7261, "step": 4241 }, { "epoch": 0.07754035132615571, "grad_norm": 7.308490142334014, "learning_rate": 9.940895171411547e-06, "loss": 18.0319, "step": 4242 }, { "epoch": 0.07755863052260223, "grad_norm": 7.27763941755643, "learning_rate": 9.940849782923016e-06, "loss": 17.8745, "step": 4243 }, { "epoch": 0.07757690971904875, "grad_norm": 7.929012297932565, "learning_rate": 9.940804377117238e-06, "loss": 17.8578, "step": 4244 }, { "epoch": 0.07759518891549527, "grad_norm": 8.097441680087801, "learning_rate": 9.940758953994367e-06, "loss": 18.4698, "step": 4245 }, { "epoch": 0.0776134681119418, "grad_norm": 8.606117810919702, "learning_rate": 9.940713513554567e-06, "loss": 18.5981, "step": 4246 }, { "epoch": 0.07763174730838833, "grad_norm": 7.9802614013269615, "learning_rate": 9.940668055797993e-06, "loss": 17.9609, "step": 4247 }, { "epoch": 0.07765002650483485, "grad_norm": 6.401310771525295, "learning_rate": 9.940622580724806e-06, "loss": 17.5281, "step": 4248 }, { "epoch": 0.07766830570128137, "grad_norm": 7.765152466156165, "learning_rate": 9.940577088335167e-06, "loss": 18.4588, "step": 4249 }, { "epoch": 0.0776865848977279, "grad_norm": 11.812201013267776, "learning_rate": 9.940531578629234e-06, "loss": 18.3213, "step": 4250 }, { "epoch": 0.07770486409417442, "grad_norm": 8.107179031342193, "learning_rate": 9.940486051607164e-06, "loss": 18.1375, "step": 4251 }, { "epoch": 0.07772314329062094, "grad_norm": 7.390909636393926, "learning_rate": 9.940440507269121e-06, "loss": 18.0795, "step": 4252 }, { "epoch": 0.07774142248706747, "grad_norm": 6.608999100204548, "learning_rate": 9.940394945615262e-06, "loss": 17.4263, "step": 4253 }, { "epoch": 0.077759701683514, "grad_norm": 7.24455911996213, "learning_rate": 9.94034936664575e-06, "loss": 17.7903, "step": 4254 }, { "epoch": 0.07777798087996052, "grad_norm": 6.510455486033311, "learning_rate": 9.94030377036074e-06, "loss": 17.4657, "step": 4255 }, { "epoch": 0.07779626007640704, "grad_norm": 6.702799210893266, "learning_rate": 9.940258156760393e-06, "loss": 17.7637, "step": 4256 }, { "epoch": 0.07781453927285356, "grad_norm": 6.25184406311624, "learning_rate": 9.940212525844872e-06, "loss": 17.3767, "step": 4257 }, { "epoch": 0.07783281846930008, "grad_norm": 6.063002518150071, "learning_rate": 9.940166877614332e-06, "loss": 17.1084, "step": 4258 }, { "epoch": 0.07785109766574662, "grad_norm": 6.809055593118283, "learning_rate": 9.940121212068938e-06, "loss": 17.5896, "step": 4259 }, { "epoch": 0.07786937686219314, "grad_norm": 7.500670999454073, "learning_rate": 9.940075529208848e-06, "loss": 18.0046, "step": 4260 }, { "epoch": 0.07788765605863966, "grad_norm": 7.555990688721894, "learning_rate": 9.940029829034221e-06, "loss": 17.9302, "step": 4261 }, { "epoch": 0.07790593525508618, "grad_norm": 7.888456038024813, "learning_rate": 9.93998411154522e-06, "loss": 18.5802, "step": 4262 }, { "epoch": 0.0779242144515327, "grad_norm": 7.8178178993913745, "learning_rate": 9.939938376742e-06, "loss": 18.2034, "step": 4263 }, { "epoch": 0.07794249364797924, "grad_norm": 7.175630237951054, "learning_rate": 9.939892624624727e-06, "loss": 17.7728, "step": 4264 }, { "epoch": 0.07796077284442576, "grad_norm": 7.269642141011237, "learning_rate": 9.939846855193558e-06, "loss": 17.5007, "step": 4265 }, { "epoch": 0.07797905204087228, "grad_norm": 8.343995757845725, "learning_rate": 9.939801068448655e-06, "loss": 18.0206, "step": 4266 }, { "epoch": 0.0779973312373188, "grad_norm": 8.104335902213569, "learning_rate": 9.939755264390179e-06, "loss": 17.8248, "step": 4267 }, { "epoch": 0.07801561043376533, "grad_norm": 6.52825714404, "learning_rate": 9.939709443018288e-06, "loss": 17.5734, "step": 4268 }, { "epoch": 0.07803388963021185, "grad_norm": 7.635623842563316, "learning_rate": 9.939663604333144e-06, "loss": 18.1016, "step": 4269 }, { "epoch": 0.07805216882665839, "grad_norm": 7.086943496847928, "learning_rate": 9.939617748334908e-06, "loss": 17.8211, "step": 4270 }, { "epoch": 0.07807044802310491, "grad_norm": 6.366837535999178, "learning_rate": 9.939571875023742e-06, "loss": 17.3253, "step": 4271 }, { "epoch": 0.07808872721955143, "grad_norm": 10.72186509949408, "learning_rate": 9.939525984399803e-06, "loss": 18.0906, "step": 4272 }, { "epoch": 0.07810700641599795, "grad_norm": 8.041045026155597, "learning_rate": 9.939480076463255e-06, "loss": 17.9549, "step": 4273 }, { "epoch": 0.07812528561244447, "grad_norm": 8.001466904872732, "learning_rate": 9.939434151214258e-06, "loss": 18.0185, "step": 4274 }, { "epoch": 0.078143564808891, "grad_norm": 8.31181029522459, "learning_rate": 9.93938820865297e-06, "loss": 17.6518, "step": 4275 }, { "epoch": 0.07816184400533753, "grad_norm": 7.490995115319857, "learning_rate": 9.93934224877956e-06, "loss": 17.8578, "step": 4276 }, { "epoch": 0.07818012320178405, "grad_norm": 6.490426920828199, "learning_rate": 9.939296271594181e-06, "loss": 17.338, "step": 4277 }, { "epoch": 0.07819840239823057, "grad_norm": 7.262720232209258, "learning_rate": 9.939250277096998e-06, "loss": 17.7793, "step": 4278 }, { "epoch": 0.0782166815946771, "grad_norm": 8.556554607249234, "learning_rate": 9.939204265288172e-06, "loss": 18.791, "step": 4279 }, { "epoch": 0.07823496079112362, "grad_norm": 6.868949045187712, "learning_rate": 9.939158236167863e-06, "loss": 17.5968, "step": 4280 }, { "epoch": 0.07825323998757015, "grad_norm": 6.888572727906375, "learning_rate": 9.939112189736231e-06, "loss": 17.615, "step": 4281 }, { "epoch": 0.07827151918401667, "grad_norm": 8.452397434426997, "learning_rate": 9.939066125993442e-06, "loss": 18.4356, "step": 4282 }, { "epoch": 0.0782897983804632, "grad_norm": 7.715575386065501, "learning_rate": 9.939020044939654e-06, "loss": 18.2356, "step": 4283 }, { "epoch": 0.07830807757690972, "grad_norm": 7.778073512619908, "learning_rate": 9.93897394657503e-06, "loss": 18.0708, "step": 4284 }, { "epoch": 0.07832635677335624, "grad_norm": 6.902819822531594, "learning_rate": 9.938927830899729e-06, "loss": 17.7574, "step": 4285 }, { "epoch": 0.07834463596980276, "grad_norm": 6.658068507941403, "learning_rate": 9.938881697913917e-06, "loss": 17.5977, "step": 4286 }, { "epoch": 0.0783629151662493, "grad_norm": 7.359021623098024, "learning_rate": 9.93883554761775e-06, "loss": 17.7763, "step": 4287 }, { "epoch": 0.07838119436269582, "grad_norm": 7.15910064266547, "learning_rate": 9.938789380011396e-06, "loss": 17.4, "step": 4288 }, { "epoch": 0.07839947355914234, "grad_norm": 7.149554481888531, "learning_rate": 9.938743195095013e-06, "loss": 17.796, "step": 4289 }, { "epoch": 0.07841775275558886, "grad_norm": 6.898549445763817, "learning_rate": 9.938696992868764e-06, "loss": 17.9461, "step": 4290 }, { "epoch": 0.07843603195203538, "grad_norm": 7.216662003346921, "learning_rate": 9.93865077333281e-06, "loss": 17.7372, "step": 4291 }, { "epoch": 0.0784543111484819, "grad_norm": 8.810328140667025, "learning_rate": 9.938604536487314e-06, "loss": 17.9731, "step": 4292 }, { "epoch": 0.07847259034492844, "grad_norm": 8.580926615759612, "learning_rate": 9.938558282332438e-06, "loss": 17.9521, "step": 4293 }, { "epoch": 0.07849086954137496, "grad_norm": 7.240334581817721, "learning_rate": 9.938512010868342e-06, "loss": 17.9799, "step": 4294 }, { "epoch": 0.07850914873782149, "grad_norm": 7.753443804416742, "learning_rate": 9.938465722095192e-06, "loss": 18.105, "step": 4295 }, { "epoch": 0.07852742793426801, "grad_norm": 7.328982040101361, "learning_rate": 9.938419416013146e-06, "loss": 17.6112, "step": 4296 }, { "epoch": 0.07854570713071453, "grad_norm": 8.148839296910063, "learning_rate": 9.93837309262237e-06, "loss": 18.1112, "step": 4297 }, { "epoch": 0.07856398632716106, "grad_norm": 7.801790674173154, "learning_rate": 9.938326751923025e-06, "loss": 17.9724, "step": 4298 }, { "epoch": 0.07858226552360759, "grad_norm": 7.505775094420256, "learning_rate": 9.938280393915272e-06, "loss": 17.8813, "step": 4299 }, { "epoch": 0.07860054472005411, "grad_norm": 7.702707200415012, "learning_rate": 9.938234018599275e-06, "loss": 17.8986, "step": 4300 }, { "epoch": 0.07861882391650063, "grad_norm": 7.049950975054393, "learning_rate": 9.938187625975197e-06, "loss": 17.6547, "step": 4301 }, { "epoch": 0.07863710311294715, "grad_norm": 8.44393217676752, "learning_rate": 9.9381412160432e-06, "loss": 17.6841, "step": 4302 }, { "epoch": 0.07865538230939367, "grad_norm": 7.337931856883426, "learning_rate": 9.938094788803448e-06, "loss": 17.7211, "step": 4303 }, { "epoch": 0.07867366150584021, "grad_norm": 8.163342510925498, "learning_rate": 9.938048344256099e-06, "loss": 18.1403, "step": 4304 }, { "epoch": 0.07869194070228673, "grad_norm": 6.903131863262117, "learning_rate": 9.938001882401321e-06, "loss": 17.3792, "step": 4305 }, { "epoch": 0.07871021989873325, "grad_norm": 6.557663602936739, "learning_rate": 9.937955403239275e-06, "loss": 17.4879, "step": 4306 }, { "epoch": 0.07872849909517977, "grad_norm": 8.7068292546031, "learning_rate": 9.937908906770122e-06, "loss": 18.3442, "step": 4307 }, { "epoch": 0.0787467782916263, "grad_norm": 7.533992396360987, "learning_rate": 9.937862392994029e-06, "loss": 18.1053, "step": 4308 }, { "epoch": 0.07876505748807282, "grad_norm": 7.633646651233243, "learning_rate": 9.937815861911155e-06, "loss": 18.0692, "step": 4309 }, { "epoch": 0.07878333668451935, "grad_norm": 8.756093481318945, "learning_rate": 9.937769313521666e-06, "loss": 18.8583, "step": 4310 }, { "epoch": 0.07880161588096588, "grad_norm": 7.024380608001842, "learning_rate": 9.937722747825724e-06, "loss": 18.0651, "step": 4311 }, { "epoch": 0.0788198950774124, "grad_norm": 6.308581936823587, "learning_rate": 9.937676164823493e-06, "loss": 17.2601, "step": 4312 }, { "epoch": 0.07883817427385892, "grad_norm": 8.201208200163915, "learning_rate": 9.937629564515134e-06, "loss": 18.1754, "step": 4313 }, { "epoch": 0.07885645347030544, "grad_norm": 6.615981830198527, "learning_rate": 9.937582946900812e-06, "loss": 17.5142, "step": 4314 }, { "epoch": 0.07887473266675198, "grad_norm": 6.925562656026488, "learning_rate": 9.93753631198069e-06, "loss": 17.6064, "step": 4315 }, { "epoch": 0.0788930118631985, "grad_norm": 7.071526950789263, "learning_rate": 9.937489659754933e-06, "loss": 17.6915, "step": 4316 }, { "epoch": 0.07891129105964502, "grad_norm": 6.93320424115655, "learning_rate": 9.937442990223703e-06, "loss": 17.8624, "step": 4317 }, { "epoch": 0.07892957025609154, "grad_norm": 7.17766099048035, "learning_rate": 9.937396303387164e-06, "loss": 17.812, "step": 4318 }, { "epoch": 0.07894784945253806, "grad_norm": 6.987283203511792, "learning_rate": 9.937349599245478e-06, "loss": 17.4972, "step": 4319 }, { "epoch": 0.07896612864898458, "grad_norm": 7.177250193446535, "learning_rate": 9.937302877798811e-06, "loss": 17.7235, "step": 4320 }, { "epoch": 0.07898440784543112, "grad_norm": 5.464667511273553, "learning_rate": 9.937256139047326e-06, "loss": 17.2885, "step": 4321 }, { "epoch": 0.07900268704187764, "grad_norm": 7.365873459882448, "learning_rate": 9.937209382991187e-06, "loss": 17.5206, "step": 4322 }, { "epoch": 0.07902096623832416, "grad_norm": 7.726819290001141, "learning_rate": 9.937162609630556e-06, "loss": 18.4063, "step": 4323 }, { "epoch": 0.07903924543477069, "grad_norm": 9.092986251084202, "learning_rate": 9.937115818965599e-06, "loss": 18.6479, "step": 4324 }, { "epoch": 0.07905752463121721, "grad_norm": 8.531475571100337, "learning_rate": 9.93706901099648e-06, "loss": 18.4475, "step": 4325 }, { "epoch": 0.07907580382766373, "grad_norm": 7.452186063785753, "learning_rate": 9.937022185723363e-06, "loss": 18.0895, "step": 4326 }, { "epoch": 0.07909408302411026, "grad_norm": 7.9536205749635345, "learning_rate": 9.936975343146411e-06, "loss": 18.2044, "step": 4327 }, { "epoch": 0.07911236222055679, "grad_norm": 9.890426772928912, "learning_rate": 9.93692848326579e-06, "loss": 18.667, "step": 4328 }, { "epoch": 0.07913064141700331, "grad_norm": 6.224300772800686, "learning_rate": 9.936881606081662e-06, "loss": 17.4493, "step": 4329 }, { "epoch": 0.07914892061344983, "grad_norm": 7.671174616757518, "learning_rate": 9.93683471159419e-06, "loss": 17.6893, "step": 4330 }, { "epoch": 0.07916719980989635, "grad_norm": 7.377338604220668, "learning_rate": 9.936787799803544e-06, "loss": 17.8501, "step": 4331 }, { "epoch": 0.07918547900634289, "grad_norm": 6.423033613389302, "learning_rate": 9.936740870709885e-06, "loss": 17.4775, "step": 4332 }, { "epoch": 0.07920375820278941, "grad_norm": 9.213909832510181, "learning_rate": 9.936693924313377e-06, "loss": 18.5094, "step": 4333 }, { "epoch": 0.07922203739923593, "grad_norm": 7.030187660722593, "learning_rate": 9.936646960614185e-06, "loss": 17.8259, "step": 4334 }, { "epoch": 0.07924031659568245, "grad_norm": 6.524362985021903, "learning_rate": 9.936599979612473e-06, "loss": 17.3938, "step": 4335 }, { "epoch": 0.07925859579212897, "grad_norm": 6.345926930675069, "learning_rate": 9.936552981308408e-06, "loss": 17.2927, "step": 4336 }, { "epoch": 0.0792768749885755, "grad_norm": 8.18555463990253, "learning_rate": 9.93650596570215e-06, "loss": 17.9193, "step": 4337 }, { "epoch": 0.07929515418502203, "grad_norm": 7.172318325546454, "learning_rate": 9.936458932793872e-06, "loss": 17.9054, "step": 4338 }, { "epoch": 0.07931343338146855, "grad_norm": 7.01926924122479, "learning_rate": 9.93641188258373e-06, "loss": 17.7644, "step": 4339 }, { "epoch": 0.07933171257791508, "grad_norm": 7.223352662426126, "learning_rate": 9.936364815071894e-06, "loss": 17.6415, "step": 4340 }, { "epoch": 0.0793499917743616, "grad_norm": 6.99194164800054, "learning_rate": 9.936317730258527e-06, "loss": 17.6133, "step": 4341 }, { "epoch": 0.07936827097080812, "grad_norm": 7.2545172317223034, "learning_rate": 9.936270628143796e-06, "loss": 17.6298, "step": 4342 }, { "epoch": 0.07938655016725464, "grad_norm": 7.276928721931867, "learning_rate": 9.936223508727864e-06, "loss": 17.9755, "step": 4343 }, { "epoch": 0.07940482936370118, "grad_norm": 6.7277421681572, "learning_rate": 9.936176372010898e-06, "loss": 17.3964, "step": 4344 }, { "epoch": 0.0794231085601477, "grad_norm": 7.525938658288535, "learning_rate": 9.93612921799306e-06, "loss": 17.7782, "step": 4345 }, { "epoch": 0.07944138775659422, "grad_norm": 6.957726541270671, "learning_rate": 9.93608204667452e-06, "loss": 17.4248, "step": 4346 }, { "epoch": 0.07945966695304074, "grad_norm": 7.6856434204431885, "learning_rate": 9.936034858055439e-06, "loss": 18.1327, "step": 4347 }, { "epoch": 0.07947794614948726, "grad_norm": 6.898323366882175, "learning_rate": 9.935987652135983e-06, "loss": 17.616, "step": 4348 }, { "epoch": 0.0794962253459338, "grad_norm": 8.20052684478149, "learning_rate": 9.93594042891632e-06, "loss": 17.9773, "step": 4349 }, { "epoch": 0.07951450454238032, "grad_norm": 7.194753526930869, "learning_rate": 9.935893188396614e-06, "loss": 17.8848, "step": 4350 }, { "epoch": 0.07953278373882684, "grad_norm": 10.113227968945113, "learning_rate": 9.935845930577032e-06, "loss": 19.0136, "step": 4351 }, { "epoch": 0.07955106293527336, "grad_norm": 8.986283506326176, "learning_rate": 9.935798655457737e-06, "loss": 18.8284, "step": 4352 }, { "epoch": 0.07956934213171989, "grad_norm": 7.133922398959805, "learning_rate": 9.935751363038897e-06, "loss": 17.5253, "step": 4353 }, { "epoch": 0.07958762132816641, "grad_norm": 8.27673277270031, "learning_rate": 9.935704053320676e-06, "loss": 18.2957, "step": 4354 }, { "epoch": 0.07960590052461294, "grad_norm": 7.676831202603974, "learning_rate": 9.93565672630324e-06, "loss": 17.9769, "step": 4355 }, { "epoch": 0.07962417972105947, "grad_norm": 6.54709400823552, "learning_rate": 9.935609381986756e-06, "loss": 17.472, "step": 4356 }, { "epoch": 0.07964245891750599, "grad_norm": 7.033668989331142, "learning_rate": 9.93556202037139e-06, "loss": 17.5056, "step": 4357 }, { "epoch": 0.07966073811395251, "grad_norm": 5.438645737245922, "learning_rate": 9.935514641457307e-06, "loss": 16.8826, "step": 4358 }, { "epoch": 0.07967901731039903, "grad_norm": 7.4635302373287224, "learning_rate": 9.935467245244674e-06, "loss": 18.0071, "step": 4359 }, { "epoch": 0.07969729650684555, "grad_norm": 8.001907384704584, "learning_rate": 9.935419831733655e-06, "loss": 18.1448, "step": 4360 }, { "epoch": 0.07971557570329209, "grad_norm": 5.58895525179972, "learning_rate": 9.93537240092442e-06, "loss": 16.9925, "step": 4361 }, { "epoch": 0.07973385489973861, "grad_norm": 8.394101913852397, "learning_rate": 9.935324952817132e-06, "loss": 18.2056, "step": 4362 }, { "epoch": 0.07975213409618513, "grad_norm": 6.185491922238464, "learning_rate": 9.935277487411957e-06, "loss": 17.406, "step": 4363 }, { "epoch": 0.07977041329263165, "grad_norm": 7.82182368160327, "learning_rate": 9.935230004709064e-06, "loss": 18.0795, "step": 4364 }, { "epoch": 0.07978869248907818, "grad_norm": 7.832177300994306, "learning_rate": 9.935182504708619e-06, "loss": 17.9343, "step": 4365 }, { "epoch": 0.07980697168552471, "grad_norm": 8.041377318900343, "learning_rate": 9.935134987410786e-06, "loss": 17.9911, "step": 4366 }, { "epoch": 0.07982525088197123, "grad_norm": 8.440419909385117, "learning_rate": 9.935087452815733e-06, "loss": 18.4141, "step": 4367 }, { "epoch": 0.07984353007841775, "grad_norm": 7.705909091524087, "learning_rate": 9.935039900923627e-06, "loss": 18.129, "step": 4368 }, { "epoch": 0.07986180927486428, "grad_norm": 7.256927719180046, "learning_rate": 9.934992331734635e-06, "loss": 17.8265, "step": 4369 }, { "epoch": 0.0798800884713108, "grad_norm": 6.727981028320891, "learning_rate": 9.934944745248922e-06, "loss": 17.3921, "step": 4370 }, { "epoch": 0.07989836766775732, "grad_norm": 8.888879986522245, "learning_rate": 9.934897141466658e-06, "loss": 18.2255, "step": 4371 }, { "epoch": 0.07991664686420386, "grad_norm": 7.5446180440391295, "learning_rate": 9.934849520388006e-06, "loss": 17.8565, "step": 4372 }, { "epoch": 0.07993492606065038, "grad_norm": 7.762442570123377, "learning_rate": 9.934801882013136e-06, "loss": 17.9927, "step": 4373 }, { "epoch": 0.0799532052570969, "grad_norm": 7.63817698154924, "learning_rate": 9.934754226342212e-06, "loss": 17.7823, "step": 4374 }, { "epoch": 0.07997148445354342, "grad_norm": 7.623312193856963, "learning_rate": 9.934706553375404e-06, "loss": 17.9167, "step": 4375 }, { "epoch": 0.07998976364998994, "grad_norm": 7.403930180012222, "learning_rate": 9.934658863112877e-06, "loss": 17.728, "step": 4376 }, { "epoch": 0.08000804284643646, "grad_norm": 7.913128623022587, "learning_rate": 9.934611155554799e-06, "loss": 17.9917, "step": 4377 }, { "epoch": 0.080026322042883, "grad_norm": 7.8683142771557, "learning_rate": 9.934563430701336e-06, "loss": 17.7655, "step": 4378 }, { "epoch": 0.08004460123932952, "grad_norm": 8.258553859426897, "learning_rate": 9.934515688552656e-06, "loss": 18.4457, "step": 4379 }, { "epoch": 0.08006288043577604, "grad_norm": 6.9581842199049735, "learning_rate": 9.934467929108931e-06, "loss": 17.6692, "step": 4380 }, { "epoch": 0.08008115963222256, "grad_norm": 6.791523737572199, "learning_rate": 9.934420152370321e-06, "loss": 17.255, "step": 4381 }, { "epoch": 0.08009943882866909, "grad_norm": 8.517797819825347, "learning_rate": 9.934372358336996e-06, "loss": 18.1414, "step": 4382 }, { "epoch": 0.08011771802511562, "grad_norm": 7.555933529906602, "learning_rate": 9.934324547009125e-06, "loss": 17.8544, "step": 4383 }, { "epoch": 0.08013599722156214, "grad_norm": 7.502407572634842, "learning_rate": 9.934276718386874e-06, "loss": 17.6756, "step": 4384 }, { "epoch": 0.08015427641800867, "grad_norm": 8.479011498340665, "learning_rate": 9.934228872470413e-06, "loss": 18.5501, "step": 4385 }, { "epoch": 0.08017255561445519, "grad_norm": 7.359950844823468, "learning_rate": 9.934181009259907e-06, "loss": 17.9449, "step": 4386 }, { "epoch": 0.08019083481090171, "grad_norm": 8.057010421147657, "learning_rate": 9.934133128755524e-06, "loss": 17.7369, "step": 4387 }, { "epoch": 0.08020911400734823, "grad_norm": 7.298814909341879, "learning_rate": 9.934085230957434e-06, "loss": 17.8067, "step": 4388 }, { "epoch": 0.08022739320379477, "grad_norm": 6.168865086056675, "learning_rate": 9.934037315865802e-06, "loss": 17.2977, "step": 4389 }, { "epoch": 0.08024567240024129, "grad_norm": 8.712264724286051, "learning_rate": 9.933989383480797e-06, "loss": 18.6987, "step": 4390 }, { "epoch": 0.08026395159668781, "grad_norm": 8.715454270171263, "learning_rate": 9.93394143380259e-06, "loss": 18.5641, "step": 4391 }, { "epoch": 0.08028223079313433, "grad_norm": 6.871268595151202, "learning_rate": 9.933893466831344e-06, "loss": 17.7549, "step": 4392 }, { "epoch": 0.08030050998958085, "grad_norm": 6.50455131844434, "learning_rate": 9.933845482567231e-06, "loss": 17.4704, "step": 4393 }, { "epoch": 0.08031878918602738, "grad_norm": 7.495903605026569, "learning_rate": 9.933797481010417e-06, "loss": 17.7965, "step": 4394 }, { "epoch": 0.08033706838247391, "grad_norm": 7.308092299369829, "learning_rate": 9.93374946216107e-06, "loss": 17.8812, "step": 4395 }, { "epoch": 0.08035534757892043, "grad_norm": 8.936969907894749, "learning_rate": 9.93370142601936e-06, "loss": 18.3376, "step": 4396 }, { "epoch": 0.08037362677536695, "grad_norm": 7.012556279337693, "learning_rate": 9.933653372585454e-06, "loss": 17.6605, "step": 4397 }, { "epoch": 0.08039190597181348, "grad_norm": 8.659173418122123, "learning_rate": 9.933605301859523e-06, "loss": 18.6341, "step": 4398 }, { "epoch": 0.08041018516826, "grad_norm": 5.968503921611904, "learning_rate": 9.933557213841733e-06, "loss": 17.2194, "step": 4399 }, { "epoch": 0.08042846436470653, "grad_norm": 7.426403281013527, "learning_rate": 9.933509108532251e-06, "loss": 17.8206, "step": 4400 }, { "epoch": 0.08044674356115306, "grad_norm": 8.197047539262414, "learning_rate": 9.93346098593125e-06, "loss": 18.3993, "step": 4401 }, { "epoch": 0.08046502275759958, "grad_norm": 7.065256372234396, "learning_rate": 9.933412846038896e-06, "loss": 17.9417, "step": 4402 }, { "epoch": 0.0804833019540461, "grad_norm": 6.907884075342692, "learning_rate": 9.933364688855357e-06, "loss": 17.8059, "step": 4403 }, { "epoch": 0.08050158115049262, "grad_norm": 7.340969901260953, "learning_rate": 9.933316514380804e-06, "loss": 17.6631, "step": 4404 }, { "epoch": 0.08051986034693914, "grad_norm": 7.9374478532920465, "learning_rate": 9.933268322615403e-06, "loss": 18.5271, "step": 4405 }, { "epoch": 0.08053813954338568, "grad_norm": 7.4468071554846444, "learning_rate": 9.933220113559327e-06, "loss": 17.822, "step": 4406 }, { "epoch": 0.0805564187398322, "grad_norm": 6.920858580541004, "learning_rate": 9.93317188721274e-06, "loss": 17.5771, "step": 4407 }, { "epoch": 0.08057469793627872, "grad_norm": 7.114072198051114, "learning_rate": 9.933123643575816e-06, "loss": 17.8293, "step": 4408 }, { "epoch": 0.08059297713272524, "grad_norm": 6.841327818725455, "learning_rate": 9.933075382648721e-06, "loss": 17.4228, "step": 4409 }, { "epoch": 0.08061125632917177, "grad_norm": 7.100727522736129, "learning_rate": 9.933027104431624e-06, "loss": 17.6061, "step": 4410 }, { "epoch": 0.08062953552561829, "grad_norm": 9.30397315079259, "learning_rate": 9.932978808924696e-06, "loss": 18.1698, "step": 4411 }, { "epoch": 0.08064781472206482, "grad_norm": 6.896796777753139, "learning_rate": 9.932930496128105e-06, "loss": 17.8227, "step": 4412 }, { "epoch": 0.08066609391851134, "grad_norm": 7.270965623141832, "learning_rate": 9.93288216604202e-06, "loss": 17.7673, "step": 4413 }, { "epoch": 0.08068437311495787, "grad_norm": 8.296988554740144, "learning_rate": 9.932833818666611e-06, "loss": 18.2694, "step": 4414 }, { "epoch": 0.08070265231140439, "grad_norm": 8.778225644823923, "learning_rate": 9.93278545400205e-06, "loss": 18.4442, "step": 4415 }, { "epoch": 0.08072093150785091, "grad_norm": 7.208020527454145, "learning_rate": 9.932737072048502e-06, "loss": 17.9922, "step": 4416 }, { "epoch": 0.08073921070429745, "grad_norm": 7.010547771327252, "learning_rate": 9.932688672806138e-06, "loss": 17.7569, "step": 4417 }, { "epoch": 0.08075748990074397, "grad_norm": 7.83570712937895, "learning_rate": 9.93264025627513e-06, "loss": 17.9074, "step": 4418 }, { "epoch": 0.08077576909719049, "grad_norm": 8.072153380337696, "learning_rate": 9.932591822455643e-06, "loss": 18.5003, "step": 4419 }, { "epoch": 0.08079404829363701, "grad_norm": 7.838063096096014, "learning_rate": 9.932543371347853e-06, "loss": 18.0529, "step": 4420 }, { "epoch": 0.08081232749008353, "grad_norm": 7.6052270885843045, "learning_rate": 9.932494902951925e-06, "loss": 18.0015, "step": 4421 }, { "epoch": 0.08083060668653005, "grad_norm": 7.646716818032306, "learning_rate": 9.932446417268031e-06, "loss": 17.9335, "step": 4422 }, { "epoch": 0.08084888588297659, "grad_norm": 6.74951305218201, "learning_rate": 9.93239791429634e-06, "loss": 17.895, "step": 4423 }, { "epoch": 0.08086716507942311, "grad_norm": 7.638016311406168, "learning_rate": 9.932349394037022e-06, "loss": 17.9338, "step": 4424 }, { "epoch": 0.08088544427586963, "grad_norm": 6.775714397132939, "learning_rate": 9.932300856490248e-06, "loss": 17.6984, "step": 4425 }, { "epoch": 0.08090372347231616, "grad_norm": 6.715265997146286, "learning_rate": 9.932252301656187e-06, "loss": 17.8797, "step": 4426 }, { "epoch": 0.08092200266876268, "grad_norm": 6.708322945726105, "learning_rate": 9.932203729535011e-06, "loss": 17.7322, "step": 4427 }, { "epoch": 0.0809402818652092, "grad_norm": 7.674689554676557, "learning_rate": 9.932155140126886e-06, "loss": 18.0179, "step": 4428 }, { "epoch": 0.08095856106165573, "grad_norm": 6.0953774890285946, "learning_rate": 9.932106533431988e-06, "loss": 17.266, "step": 4429 }, { "epoch": 0.08097684025810226, "grad_norm": 8.123767572496034, "learning_rate": 9.932057909450483e-06, "loss": 17.9947, "step": 4430 }, { "epoch": 0.08099511945454878, "grad_norm": 8.007848703055455, "learning_rate": 9.932009268182542e-06, "loss": 18.0589, "step": 4431 }, { "epoch": 0.0810133986509953, "grad_norm": 7.728613347671448, "learning_rate": 9.93196060962834e-06, "loss": 18.0519, "step": 4432 }, { "epoch": 0.08103167784744182, "grad_norm": 6.914287168216555, "learning_rate": 9.931911933788043e-06, "loss": 17.6688, "step": 4433 }, { "epoch": 0.08104995704388836, "grad_norm": 6.594437207858085, "learning_rate": 9.931863240661821e-06, "loss": 17.6638, "step": 4434 }, { "epoch": 0.08106823624033488, "grad_norm": 6.6198252529318395, "learning_rate": 9.931814530249847e-06, "loss": 17.5418, "step": 4435 }, { "epoch": 0.0810865154367814, "grad_norm": 11.144267892914211, "learning_rate": 9.93176580255229e-06, "loss": 18.2372, "step": 4436 }, { "epoch": 0.08110479463322792, "grad_norm": 7.014288301350778, "learning_rate": 9.931717057569323e-06, "loss": 17.6551, "step": 4437 }, { "epoch": 0.08112307382967444, "grad_norm": 7.318544846682362, "learning_rate": 9.931668295301116e-06, "loss": 17.9492, "step": 4438 }, { "epoch": 0.08114135302612097, "grad_norm": 10.872672173263489, "learning_rate": 9.931619515747838e-06, "loss": 17.9141, "step": 4439 }, { "epoch": 0.0811596322225675, "grad_norm": 6.240998164763673, "learning_rate": 9.931570718909664e-06, "loss": 17.1257, "step": 4440 }, { "epoch": 0.08117791141901402, "grad_norm": 7.99350319505303, "learning_rate": 9.931521904786761e-06, "loss": 17.9102, "step": 4441 }, { "epoch": 0.08119619061546055, "grad_norm": 6.712015008086295, "learning_rate": 9.931473073379304e-06, "loss": 17.6588, "step": 4442 }, { "epoch": 0.08121446981190707, "grad_norm": 7.902877742430194, "learning_rate": 9.931424224687458e-06, "loss": 18.1366, "step": 4443 }, { "epoch": 0.08123274900835359, "grad_norm": 7.78949894709229, "learning_rate": 9.9313753587114e-06, "loss": 18.3117, "step": 4444 }, { "epoch": 0.08125102820480011, "grad_norm": 7.1016311906620855, "learning_rate": 9.9313264754513e-06, "loss": 17.7372, "step": 4445 }, { "epoch": 0.08126930740124665, "grad_norm": 6.478915491179522, "learning_rate": 9.931277574907328e-06, "loss": 17.4951, "step": 4446 }, { "epoch": 0.08128758659769317, "grad_norm": 8.509384946617994, "learning_rate": 9.931228657079655e-06, "loss": 18.6333, "step": 4447 }, { "epoch": 0.08130586579413969, "grad_norm": 7.650938902341948, "learning_rate": 9.931179721968453e-06, "loss": 17.7259, "step": 4448 }, { "epoch": 0.08132414499058621, "grad_norm": 7.7217687624076765, "learning_rate": 9.931130769573896e-06, "loss": 18.0419, "step": 4449 }, { "epoch": 0.08134242418703273, "grad_norm": 8.289211316693782, "learning_rate": 9.931081799896153e-06, "loss": 18.0502, "step": 4450 }, { "epoch": 0.08136070338347927, "grad_norm": 7.714887931684799, "learning_rate": 9.931032812935397e-06, "loss": 17.8473, "step": 4451 }, { "epoch": 0.08137898257992579, "grad_norm": 7.736070745534125, "learning_rate": 9.930983808691796e-06, "loss": 18.1318, "step": 4452 }, { "epoch": 0.08139726177637231, "grad_norm": 6.1809140483148965, "learning_rate": 9.930934787165527e-06, "loss": 17.2089, "step": 4453 }, { "epoch": 0.08141554097281883, "grad_norm": 6.949007285441479, "learning_rate": 9.930885748356759e-06, "loss": 17.5245, "step": 4454 }, { "epoch": 0.08143382016926536, "grad_norm": 6.645321717408077, "learning_rate": 9.930836692265664e-06, "loss": 17.6439, "step": 4455 }, { "epoch": 0.08145209936571188, "grad_norm": 6.932555337897875, "learning_rate": 9.930787618892415e-06, "loss": 17.7075, "step": 4456 }, { "epoch": 0.08147037856215841, "grad_norm": 7.62905891946172, "learning_rate": 9.930738528237183e-06, "loss": 17.7215, "step": 4457 }, { "epoch": 0.08148865775860493, "grad_norm": 6.926128597480996, "learning_rate": 9.93068942030014e-06, "loss": 17.4867, "step": 4458 }, { "epoch": 0.08150693695505146, "grad_norm": 6.5618999252141545, "learning_rate": 9.930640295081458e-06, "loss": 17.3902, "step": 4459 }, { "epoch": 0.08152521615149798, "grad_norm": 6.971387561714625, "learning_rate": 9.93059115258131e-06, "loss": 17.6631, "step": 4460 }, { "epoch": 0.0815434953479445, "grad_norm": 7.164499672355387, "learning_rate": 9.930541992799868e-06, "loss": 17.5324, "step": 4461 }, { "epoch": 0.08156177454439102, "grad_norm": 8.462225980434976, "learning_rate": 9.930492815737305e-06, "loss": 18.4413, "step": 4462 }, { "epoch": 0.08158005374083756, "grad_norm": 7.348675859790189, "learning_rate": 9.930443621393792e-06, "loss": 17.9459, "step": 4463 }, { "epoch": 0.08159833293728408, "grad_norm": 5.875306457033801, "learning_rate": 9.9303944097695e-06, "loss": 17.3848, "step": 4464 }, { "epoch": 0.0816166121337306, "grad_norm": 6.196416695570449, "learning_rate": 9.930345180864606e-06, "loss": 17.3206, "step": 4465 }, { "epoch": 0.08163489133017712, "grad_norm": 6.784910250618494, "learning_rate": 9.93029593467928e-06, "loss": 17.3085, "step": 4466 }, { "epoch": 0.08165317052662364, "grad_norm": 7.625413552891611, "learning_rate": 9.930246671213693e-06, "loss": 18.3514, "step": 4467 }, { "epoch": 0.08167144972307018, "grad_norm": 5.547533240305542, "learning_rate": 9.93019739046802e-06, "loss": 17.0815, "step": 4468 }, { "epoch": 0.0816897289195167, "grad_norm": 7.210661771640025, "learning_rate": 9.930148092442433e-06, "loss": 17.8987, "step": 4469 }, { "epoch": 0.08170800811596322, "grad_norm": 8.65273559111609, "learning_rate": 9.930098777137104e-06, "loss": 18.4925, "step": 4470 }, { "epoch": 0.08172628731240975, "grad_norm": 8.22570860272434, "learning_rate": 9.930049444552207e-06, "loss": 18.1606, "step": 4471 }, { "epoch": 0.08174456650885627, "grad_norm": 7.6859657155698065, "learning_rate": 9.930000094687916e-06, "loss": 17.8538, "step": 4472 }, { "epoch": 0.08176284570530279, "grad_norm": 7.92076233119735, "learning_rate": 9.929950727544401e-06, "loss": 18.4207, "step": 4473 }, { "epoch": 0.08178112490174932, "grad_norm": 7.3580475160391945, "learning_rate": 9.929901343121838e-06, "loss": 17.9415, "step": 4474 }, { "epoch": 0.08179940409819585, "grad_norm": 9.13179251316065, "learning_rate": 9.929851941420396e-06, "loss": 18.4544, "step": 4475 }, { "epoch": 0.08181768329464237, "grad_norm": 7.910382887037255, "learning_rate": 9.929802522440252e-06, "loss": 18.353, "step": 4476 }, { "epoch": 0.08183596249108889, "grad_norm": 6.534012703103533, "learning_rate": 9.929753086181578e-06, "loss": 17.5061, "step": 4477 }, { "epoch": 0.08185424168753541, "grad_norm": 7.269820604266411, "learning_rate": 9.929703632644547e-06, "loss": 17.5173, "step": 4478 }, { "epoch": 0.08187252088398193, "grad_norm": 6.758277820192978, "learning_rate": 9.929654161829333e-06, "loss": 17.5249, "step": 4479 }, { "epoch": 0.08189080008042847, "grad_norm": 6.214356646569967, "learning_rate": 9.929604673736108e-06, "loss": 17.3733, "step": 4480 }, { "epoch": 0.08190907927687499, "grad_norm": 6.651755790691796, "learning_rate": 9.929555168365048e-06, "loss": 17.4953, "step": 4481 }, { "epoch": 0.08192735847332151, "grad_norm": 5.65275080863377, "learning_rate": 9.929505645716323e-06, "loss": 17.0668, "step": 4482 }, { "epoch": 0.08194563766976803, "grad_norm": 7.818691995099381, "learning_rate": 9.92945610579011e-06, "loss": 17.8689, "step": 4483 }, { "epoch": 0.08196391686621456, "grad_norm": 6.846748702401642, "learning_rate": 9.92940654858658e-06, "loss": 17.832, "step": 4484 }, { "epoch": 0.08198219606266109, "grad_norm": 6.774227618839608, "learning_rate": 9.929356974105909e-06, "loss": 17.4402, "step": 4485 }, { "epoch": 0.08200047525910761, "grad_norm": 7.18282719284545, "learning_rate": 9.92930738234827e-06, "loss": 17.7953, "step": 4486 }, { "epoch": 0.08201875445555414, "grad_norm": 7.10034438045775, "learning_rate": 9.929257773313834e-06, "loss": 17.7903, "step": 4487 }, { "epoch": 0.08203703365200066, "grad_norm": 8.918489618016627, "learning_rate": 9.92920814700278e-06, "loss": 18.1086, "step": 4488 }, { "epoch": 0.08205531284844718, "grad_norm": 7.117493121501945, "learning_rate": 9.929158503415277e-06, "loss": 17.6571, "step": 4489 }, { "epoch": 0.0820735920448937, "grad_norm": 8.701853632068818, "learning_rate": 9.929108842551502e-06, "loss": 18.5493, "step": 4490 }, { "epoch": 0.08209187124134024, "grad_norm": 7.033729075683853, "learning_rate": 9.929059164411627e-06, "loss": 17.6714, "step": 4491 }, { "epoch": 0.08211015043778676, "grad_norm": 7.01506236368206, "learning_rate": 9.92900946899583e-06, "loss": 17.5937, "step": 4492 }, { "epoch": 0.08212842963423328, "grad_norm": 7.028807544280329, "learning_rate": 9.92895975630428e-06, "loss": 17.6652, "step": 4493 }, { "epoch": 0.0821467088306798, "grad_norm": 6.729522681157575, "learning_rate": 9.928910026337154e-06, "loss": 17.4521, "step": 4494 }, { "epoch": 0.08216498802712632, "grad_norm": 8.344437666062118, "learning_rate": 9.928860279094628e-06, "loss": 17.9272, "step": 4495 }, { "epoch": 0.08218326722357285, "grad_norm": 6.37483648731223, "learning_rate": 9.928810514576874e-06, "loss": 17.2194, "step": 4496 }, { "epoch": 0.08220154642001938, "grad_norm": 6.102383968868863, "learning_rate": 9.928760732784067e-06, "loss": 17.4369, "step": 4497 }, { "epoch": 0.0822198256164659, "grad_norm": 6.770251894115572, "learning_rate": 9.928710933716379e-06, "loss": 17.6148, "step": 4498 }, { "epoch": 0.08223810481291242, "grad_norm": 7.365270009701503, "learning_rate": 9.92866111737399e-06, "loss": 17.896, "step": 4499 }, { "epoch": 0.08225638400935895, "grad_norm": 9.041320640724681, "learning_rate": 9.928611283757068e-06, "loss": 18.3787, "step": 4500 }, { "epoch": 0.08227466320580547, "grad_norm": 7.341942300548844, "learning_rate": 9.928561432865793e-06, "loss": 17.7244, "step": 4501 }, { "epoch": 0.082292942402252, "grad_norm": 9.128635582132091, "learning_rate": 9.928511564700339e-06, "loss": 18.6112, "step": 4502 }, { "epoch": 0.08231122159869853, "grad_norm": 8.78146881450675, "learning_rate": 9.928461679260877e-06, "loss": 18.7338, "step": 4503 }, { "epoch": 0.08232950079514505, "grad_norm": 8.036743843156453, "learning_rate": 9.928411776547587e-06, "loss": 17.936, "step": 4504 }, { "epoch": 0.08234777999159157, "grad_norm": 7.310140149812051, "learning_rate": 9.92836185656064e-06, "loss": 17.5116, "step": 4505 }, { "epoch": 0.08236605918803809, "grad_norm": 7.1757266381119, "learning_rate": 9.928311919300214e-06, "loss": 17.4799, "step": 4506 }, { "epoch": 0.08238433838448461, "grad_norm": 7.604737458119529, "learning_rate": 9.928261964766481e-06, "loss": 18.138, "step": 4507 }, { "epoch": 0.08240261758093115, "grad_norm": 8.247450984783418, "learning_rate": 9.928211992959617e-06, "loss": 18.3907, "step": 4508 }, { "epoch": 0.08242089677737767, "grad_norm": 7.957011173756499, "learning_rate": 9.928162003879797e-06, "loss": 18.2793, "step": 4509 }, { "epoch": 0.08243917597382419, "grad_norm": 6.8367147643001385, "learning_rate": 9.9281119975272e-06, "loss": 17.653, "step": 4510 }, { "epoch": 0.08245745517027071, "grad_norm": 7.324051785613831, "learning_rate": 9.928061973901995e-06, "loss": 17.7182, "step": 4511 }, { "epoch": 0.08247573436671723, "grad_norm": 6.355023708620587, "learning_rate": 9.928011933004363e-06, "loss": 17.3312, "step": 4512 }, { "epoch": 0.08249401356316376, "grad_norm": 7.022166479823654, "learning_rate": 9.927961874834473e-06, "loss": 17.8033, "step": 4513 }, { "epoch": 0.08251229275961029, "grad_norm": 6.956236792848833, "learning_rate": 9.927911799392506e-06, "loss": 17.7529, "step": 4514 }, { "epoch": 0.08253057195605681, "grad_norm": 8.524558072843963, "learning_rate": 9.927861706678638e-06, "loss": 18.4426, "step": 4515 }, { "epoch": 0.08254885115250334, "grad_norm": 7.456906784203291, "learning_rate": 9.92781159669304e-06, "loss": 18.0318, "step": 4516 }, { "epoch": 0.08256713034894986, "grad_norm": 6.328814492522303, "learning_rate": 9.92776146943589e-06, "loss": 17.3499, "step": 4517 }, { "epoch": 0.08258540954539638, "grad_norm": 6.642606672615187, "learning_rate": 9.927711324907366e-06, "loss": 17.5684, "step": 4518 }, { "epoch": 0.08260368874184291, "grad_norm": 7.2954811864296865, "learning_rate": 9.927661163107639e-06, "loss": 18.0425, "step": 4519 }, { "epoch": 0.08262196793828944, "grad_norm": 7.460025589824433, "learning_rate": 9.927610984036888e-06, "loss": 17.6947, "step": 4520 }, { "epoch": 0.08264024713473596, "grad_norm": 8.381247810702762, "learning_rate": 9.927560787695288e-06, "loss": 18.1759, "step": 4521 }, { "epoch": 0.08265852633118248, "grad_norm": 5.7085072531084355, "learning_rate": 9.927510574083014e-06, "loss": 17.1358, "step": 4522 }, { "epoch": 0.082676805527629, "grad_norm": 7.967265722628898, "learning_rate": 9.927460343200245e-06, "loss": 18.0888, "step": 4523 }, { "epoch": 0.08269508472407552, "grad_norm": 6.815552621381163, "learning_rate": 9.927410095047154e-06, "loss": 17.6987, "step": 4524 }, { "epoch": 0.08271336392052206, "grad_norm": 7.394571511538061, "learning_rate": 9.92735982962392e-06, "loss": 17.7943, "step": 4525 }, { "epoch": 0.08273164311696858, "grad_norm": 9.67717658562339, "learning_rate": 9.927309546930714e-06, "loss": 18.2716, "step": 4526 }, { "epoch": 0.0827499223134151, "grad_norm": 8.130599036793969, "learning_rate": 9.927259246967718e-06, "loss": 18.3201, "step": 4527 }, { "epoch": 0.08276820150986162, "grad_norm": 7.964373664485016, "learning_rate": 9.927208929735105e-06, "loss": 18.0592, "step": 4528 }, { "epoch": 0.08278648070630815, "grad_norm": 7.2245216886891495, "learning_rate": 9.927158595233053e-06, "loss": 17.7279, "step": 4529 }, { "epoch": 0.08280475990275467, "grad_norm": 7.0316490672981695, "learning_rate": 9.927108243461737e-06, "loss": 17.7399, "step": 4530 }, { "epoch": 0.0828230390992012, "grad_norm": 7.761029426910646, "learning_rate": 9.927057874421335e-06, "loss": 17.9445, "step": 4531 }, { "epoch": 0.08284131829564773, "grad_norm": 5.900261226061749, "learning_rate": 9.927007488112022e-06, "loss": 17.4243, "step": 4532 }, { "epoch": 0.08285959749209425, "grad_norm": 8.414529587707785, "learning_rate": 9.926957084533975e-06, "loss": 17.9991, "step": 4533 }, { "epoch": 0.08287787668854077, "grad_norm": 7.605849402215815, "learning_rate": 9.926906663687371e-06, "loss": 17.9538, "step": 4534 }, { "epoch": 0.08289615588498729, "grad_norm": 6.576826027627958, "learning_rate": 9.926856225572388e-06, "loss": 17.4712, "step": 4535 }, { "epoch": 0.08291443508143383, "grad_norm": 6.501336675316235, "learning_rate": 9.9268057701892e-06, "loss": 17.5895, "step": 4536 }, { "epoch": 0.08293271427788035, "grad_norm": 6.672729074948429, "learning_rate": 9.926755297537985e-06, "loss": 17.6235, "step": 4537 }, { "epoch": 0.08295099347432687, "grad_norm": 7.931759467513436, "learning_rate": 9.92670480761892e-06, "loss": 17.9637, "step": 4538 }, { "epoch": 0.08296927267077339, "grad_norm": 6.402769235168133, "learning_rate": 9.926654300432185e-06, "loss": 17.4368, "step": 4539 }, { "epoch": 0.08298755186721991, "grad_norm": 7.409572920926637, "learning_rate": 9.926603775977953e-06, "loss": 17.9268, "step": 4540 }, { "epoch": 0.08300583106366644, "grad_norm": 7.917990749667958, "learning_rate": 9.926553234256401e-06, "loss": 18.1149, "step": 4541 }, { "epoch": 0.08302411026011297, "grad_norm": 8.082940647734613, "learning_rate": 9.92650267526771e-06, "loss": 18.0548, "step": 4542 }, { "epoch": 0.08304238945655949, "grad_norm": 7.277137111479075, "learning_rate": 9.92645209901205e-06, "loss": 17.7861, "step": 4543 }, { "epoch": 0.08306066865300601, "grad_norm": 7.4733012873198685, "learning_rate": 9.926401505489606e-06, "loss": 18.0153, "step": 4544 }, { "epoch": 0.08307894784945254, "grad_norm": 7.9215653230932395, "learning_rate": 9.926350894700552e-06, "loss": 17.9856, "step": 4545 }, { "epoch": 0.08309722704589906, "grad_norm": 7.791247708572704, "learning_rate": 9.926300266645066e-06, "loss": 18.0828, "step": 4546 }, { "epoch": 0.08311550624234558, "grad_norm": 5.959070382662814, "learning_rate": 9.926249621323325e-06, "loss": 17.1554, "step": 4547 }, { "epoch": 0.08313378543879212, "grad_norm": 8.26889967906215, "learning_rate": 9.926198958735505e-06, "loss": 18.0522, "step": 4548 }, { "epoch": 0.08315206463523864, "grad_norm": 7.152983459665711, "learning_rate": 9.926148278881787e-06, "loss": 17.669, "step": 4549 }, { "epoch": 0.08317034383168516, "grad_norm": 8.371490963724002, "learning_rate": 9.926097581762345e-06, "loss": 18.0951, "step": 4550 }, { "epoch": 0.08318862302813168, "grad_norm": 6.357697406914699, "learning_rate": 9.92604686737736e-06, "loss": 17.2828, "step": 4551 }, { "epoch": 0.0832069022245782, "grad_norm": 7.768501300521136, "learning_rate": 9.925996135727008e-06, "loss": 17.9727, "step": 4552 }, { "epoch": 0.08322518142102474, "grad_norm": 9.06060336298556, "learning_rate": 9.925945386811465e-06, "loss": 18.6941, "step": 4553 }, { "epoch": 0.08324346061747126, "grad_norm": 7.674919283870002, "learning_rate": 9.925894620630913e-06, "loss": 17.8699, "step": 4554 }, { "epoch": 0.08326173981391778, "grad_norm": 7.289462358372745, "learning_rate": 9.925843837185527e-06, "loss": 17.9227, "step": 4555 }, { "epoch": 0.0832800190103643, "grad_norm": 7.090399647235003, "learning_rate": 9.925793036475487e-06, "loss": 17.5483, "step": 4556 }, { "epoch": 0.08329829820681083, "grad_norm": 7.85372437426703, "learning_rate": 9.925742218500968e-06, "loss": 18.0304, "step": 4557 }, { "epoch": 0.08331657740325735, "grad_norm": 7.387644049768619, "learning_rate": 9.92569138326215e-06, "loss": 17.8261, "step": 4558 }, { "epoch": 0.08333485659970388, "grad_norm": 8.319402741482605, "learning_rate": 9.925640530759213e-06, "loss": 18.2823, "step": 4559 }, { "epoch": 0.0833531357961504, "grad_norm": 7.572417281552823, "learning_rate": 9.925589660992332e-06, "loss": 17.8767, "step": 4560 }, { "epoch": 0.08337141499259693, "grad_norm": 8.27022005659377, "learning_rate": 9.925538773961687e-06, "loss": 18.3284, "step": 4561 }, { "epoch": 0.08338969418904345, "grad_norm": 7.35226779602324, "learning_rate": 9.925487869667456e-06, "loss": 17.6777, "step": 4562 }, { "epoch": 0.08340797338548997, "grad_norm": 8.234002551773756, "learning_rate": 9.925436948109817e-06, "loss": 17.6783, "step": 4563 }, { "epoch": 0.08342625258193649, "grad_norm": 8.303573018991921, "learning_rate": 9.92538600928895e-06, "loss": 18.1547, "step": 4564 }, { "epoch": 0.08344453177838303, "grad_norm": 7.785953917779978, "learning_rate": 9.925335053205032e-06, "loss": 18.1239, "step": 4565 }, { "epoch": 0.08346281097482955, "grad_norm": 6.477639244898419, "learning_rate": 9.925284079858242e-06, "loss": 17.4685, "step": 4566 }, { "epoch": 0.08348109017127607, "grad_norm": 7.880983959901846, "learning_rate": 9.925233089248758e-06, "loss": 17.9942, "step": 4567 }, { "epoch": 0.08349936936772259, "grad_norm": 7.570587483189525, "learning_rate": 9.92518208137676e-06, "loss": 17.9259, "step": 4568 }, { "epoch": 0.08351764856416911, "grad_norm": 6.457233257151615, "learning_rate": 9.925131056242426e-06, "loss": 17.7171, "step": 4569 }, { "epoch": 0.08353592776061565, "grad_norm": 5.575333496408933, "learning_rate": 9.925080013845935e-06, "loss": 17.086, "step": 4570 }, { "epoch": 0.08355420695706217, "grad_norm": 6.370758261416243, "learning_rate": 9.925028954187466e-06, "loss": 17.4101, "step": 4571 }, { "epoch": 0.0835724861535087, "grad_norm": 7.532208336609889, "learning_rate": 9.924977877267198e-06, "loss": 17.9726, "step": 4572 }, { "epoch": 0.08359076534995522, "grad_norm": 7.79379453646087, "learning_rate": 9.924926783085309e-06, "loss": 17.7622, "step": 4573 }, { "epoch": 0.08360904454640174, "grad_norm": 7.349686634113788, "learning_rate": 9.92487567164198e-06, "loss": 17.5063, "step": 4574 }, { "epoch": 0.08362732374284826, "grad_norm": 7.145670600779787, "learning_rate": 9.92482454293739e-06, "loss": 17.713, "step": 4575 }, { "epoch": 0.0836456029392948, "grad_norm": 7.5937590159500505, "learning_rate": 9.924773396971716e-06, "loss": 18.2291, "step": 4576 }, { "epoch": 0.08366388213574132, "grad_norm": 6.435686012792753, "learning_rate": 9.924722233745139e-06, "loss": 17.3644, "step": 4577 }, { "epoch": 0.08368216133218784, "grad_norm": 6.725851490081413, "learning_rate": 9.924671053257838e-06, "loss": 17.9066, "step": 4578 }, { "epoch": 0.08370044052863436, "grad_norm": 8.07687328527373, "learning_rate": 9.924619855509992e-06, "loss": 18.215, "step": 4579 }, { "epoch": 0.08371871972508088, "grad_norm": 7.684491657654788, "learning_rate": 9.92456864050178e-06, "loss": 18.1529, "step": 4580 }, { "epoch": 0.0837369989215274, "grad_norm": 6.807024820501327, "learning_rate": 9.924517408233385e-06, "loss": 17.4883, "step": 4581 }, { "epoch": 0.08375527811797394, "grad_norm": 7.203634154142472, "learning_rate": 9.92446615870498e-06, "loss": 18.0135, "step": 4582 }, { "epoch": 0.08377355731442046, "grad_norm": 6.9425479112905855, "learning_rate": 9.924414891916752e-06, "loss": 17.6753, "step": 4583 }, { "epoch": 0.08379183651086698, "grad_norm": 10.335705400356025, "learning_rate": 9.924363607868875e-06, "loss": 18.7902, "step": 4584 }, { "epoch": 0.0838101157073135, "grad_norm": 6.551381299515295, "learning_rate": 9.92431230656153e-06, "loss": 17.2856, "step": 4585 }, { "epoch": 0.08382839490376003, "grad_norm": 6.193330792540202, "learning_rate": 9.9242609879949e-06, "loss": 17.3872, "step": 4586 }, { "epoch": 0.08384667410020656, "grad_norm": 7.093456749267061, "learning_rate": 9.92420965216916e-06, "loss": 17.6227, "step": 4587 }, { "epoch": 0.08386495329665308, "grad_norm": 6.386760426569066, "learning_rate": 9.924158299084497e-06, "loss": 17.5053, "step": 4588 }, { "epoch": 0.0838832324930996, "grad_norm": 7.250432946564252, "learning_rate": 9.924106928741081e-06, "loss": 17.8459, "step": 4589 }, { "epoch": 0.08390151168954613, "grad_norm": 8.420710002988253, "learning_rate": 9.924055541139101e-06, "loss": 18.6423, "step": 4590 }, { "epoch": 0.08391979088599265, "grad_norm": 7.091370597423831, "learning_rate": 9.924004136278734e-06, "loss": 17.7346, "step": 4591 }, { "epoch": 0.08393807008243917, "grad_norm": 7.407183416785795, "learning_rate": 9.923952714160158e-06, "loss": 17.9074, "step": 4592 }, { "epoch": 0.0839563492788857, "grad_norm": 6.229878431683514, "learning_rate": 9.923901274783556e-06, "loss": 17.4906, "step": 4593 }, { "epoch": 0.08397462847533223, "grad_norm": 6.24420542102906, "learning_rate": 9.923849818149106e-06, "loss": 17.3914, "step": 4594 }, { "epoch": 0.08399290767177875, "grad_norm": 7.694442797149234, "learning_rate": 9.92379834425699e-06, "loss": 17.9646, "step": 4595 }, { "epoch": 0.08401118686822527, "grad_norm": 7.534197946991661, "learning_rate": 9.923746853107389e-06, "loss": 17.7066, "step": 4596 }, { "epoch": 0.08402946606467179, "grad_norm": 7.5512725979457676, "learning_rate": 9.92369534470048e-06, "loss": 17.7111, "step": 4597 }, { "epoch": 0.08404774526111831, "grad_norm": 6.850704097721616, "learning_rate": 9.92364381903645e-06, "loss": 17.7127, "step": 4598 }, { "epoch": 0.08406602445756485, "grad_norm": 7.458877663742378, "learning_rate": 9.923592276115474e-06, "loss": 17.9754, "step": 4599 }, { "epoch": 0.08408430365401137, "grad_norm": 7.719899131278976, "learning_rate": 9.923540715937734e-06, "loss": 17.8802, "step": 4600 }, { "epoch": 0.0841025828504579, "grad_norm": 7.38570962640599, "learning_rate": 9.92348913850341e-06, "loss": 17.7526, "step": 4601 }, { "epoch": 0.08412086204690442, "grad_norm": 7.310302922438322, "learning_rate": 9.923437543812686e-06, "loss": 18.011, "step": 4602 }, { "epoch": 0.08413914124335094, "grad_norm": 7.199501970253766, "learning_rate": 9.92338593186574e-06, "loss": 17.6991, "step": 4603 }, { "epoch": 0.08415742043979747, "grad_norm": 7.660553506333359, "learning_rate": 9.923334302662753e-06, "loss": 18.1197, "step": 4604 }, { "epoch": 0.084175699636244, "grad_norm": 7.4994971748351285, "learning_rate": 9.923282656203906e-06, "loss": 17.9303, "step": 4605 }, { "epoch": 0.08419397883269052, "grad_norm": 7.022139786012747, "learning_rate": 9.92323099248938e-06, "loss": 17.6392, "step": 4606 }, { "epoch": 0.08421225802913704, "grad_norm": 6.842919258651133, "learning_rate": 9.92317931151936e-06, "loss": 17.9257, "step": 4607 }, { "epoch": 0.08423053722558356, "grad_norm": 6.455226973441174, "learning_rate": 9.923127613294021e-06, "loss": 17.3945, "step": 4608 }, { "epoch": 0.08424881642203008, "grad_norm": 7.221120648143459, "learning_rate": 9.923075897813548e-06, "loss": 17.7731, "step": 4609 }, { "epoch": 0.08426709561847662, "grad_norm": 7.568262938606192, "learning_rate": 9.92302416507812e-06, "loss": 18.06, "step": 4610 }, { "epoch": 0.08428537481492314, "grad_norm": 6.058389900149614, "learning_rate": 9.92297241508792e-06, "loss": 17.4748, "step": 4611 }, { "epoch": 0.08430365401136966, "grad_norm": 7.583642015596835, "learning_rate": 9.922920647843128e-06, "loss": 17.8556, "step": 4612 }, { "epoch": 0.08432193320781618, "grad_norm": 6.698944587393052, "learning_rate": 9.922868863343929e-06, "loss": 17.72, "step": 4613 }, { "epoch": 0.0843402124042627, "grad_norm": 7.125979338649392, "learning_rate": 9.922817061590499e-06, "loss": 17.6758, "step": 4614 }, { "epoch": 0.08435849160070923, "grad_norm": 8.228399202299315, "learning_rate": 9.922765242583023e-06, "loss": 18.3639, "step": 4615 }, { "epoch": 0.08437677079715576, "grad_norm": 9.651616808629765, "learning_rate": 9.922713406321684e-06, "loss": 18.5995, "step": 4616 }, { "epoch": 0.08439504999360228, "grad_norm": 6.877989458037605, "learning_rate": 9.92266155280666e-06, "loss": 17.4893, "step": 4617 }, { "epoch": 0.0844133291900488, "grad_norm": 6.939083593763451, "learning_rate": 9.922609682038134e-06, "loss": 17.2929, "step": 4618 }, { "epoch": 0.08443160838649533, "grad_norm": 8.447030077035594, "learning_rate": 9.92255779401629e-06, "loss": 18.0655, "step": 4619 }, { "epoch": 0.08444988758294185, "grad_norm": 7.385349730703015, "learning_rate": 9.922505888741306e-06, "loss": 17.8132, "step": 4620 }, { "epoch": 0.08446816677938838, "grad_norm": 7.611129849504548, "learning_rate": 9.922453966213366e-06, "loss": 17.9683, "step": 4621 }, { "epoch": 0.0844864459758349, "grad_norm": 6.948264508071709, "learning_rate": 9.922402026432653e-06, "loss": 17.9001, "step": 4622 }, { "epoch": 0.08450472517228143, "grad_norm": 7.269876331297846, "learning_rate": 9.922350069399349e-06, "loss": 17.7451, "step": 4623 }, { "epoch": 0.08452300436872795, "grad_norm": 8.361984673894524, "learning_rate": 9.922298095113634e-06, "loss": 17.8335, "step": 4624 }, { "epoch": 0.08454128356517447, "grad_norm": 6.842570747235827, "learning_rate": 9.922246103575692e-06, "loss": 17.7474, "step": 4625 }, { "epoch": 0.084559562761621, "grad_norm": 7.79425463051433, "learning_rate": 9.922194094785704e-06, "loss": 18.3362, "step": 4626 }, { "epoch": 0.08457784195806753, "grad_norm": 7.305390359806515, "learning_rate": 9.922142068743852e-06, "loss": 17.7493, "step": 4627 }, { "epoch": 0.08459612115451405, "grad_norm": 7.481217680342079, "learning_rate": 9.92209002545032e-06, "loss": 17.9787, "step": 4628 }, { "epoch": 0.08461440035096057, "grad_norm": 7.037488075031782, "learning_rate": 9.92203796490529e-06, "loss": 17.7031, "step": 4629 }, { "epoch": 0.0846326795474071, "grad_norm": 7.394315921256108, "learning_rate": 9.921985887108944e-06, "loss": 18.1961, "step": 4630 }, { "epoch": 0.08465095874385362, "grad_norm": 7.456021612583716, "learning_rate": 9.921933792061464e-06, "loss": 17.7568, "step": 4631 }, { "epoch": 0.08466923794030014, "grad_norm": 6.620630947578043, "learning_rate": 9.921881679763033e-06, "loss": 17.6499, "step": 4632 }, { "epoch": 0.08468751713674667, "grad_norm": 6.172522661410363, "learning_rate": 9.921829550213834e-06, "loss": 17.5983, "step": 4633 }, { "epoch": 0.0847057963331932, "grad_norm": 7.4829460278418996, "learning_rate": 9.92177740341405e-06, "loss": 18.0093, "step": 4634 }, { "epoch": 0.08472407552963972, "grad_norm": 6.460635217610971, "learning_rate": 9.921725239363862e-06, "loss": 17.4151, "step": 4635 }, { "epoch": 0.08474235472608624, "grad_norm": 7.598256342533212, "learning_rate": 9.921673058063456e-06, "loss": 17.6448, "step": 4636 }, { "epoch": 0.08476063392253276, "grad_norm": 7.915591943281325, "learning_rate": 9.921620859513012e-06, "loss": 17.9776, "step": 4637 }, { "epoch": 0.0847789131189793, "grad_norm": 6.605683142047098, "learning_rate": 9.921568643712715e-06, "loss": 17.6836, "step": 4638 }, { "epoch": 0.08479719231542582, "grad_norm": 7.004548467830464, "learning_rate": 9.921516410662745e-06, "loss": 17.6931, "step": 4639 }, { "epoch": 0.08481547151187234, "grad_norm": 7.748531232232952, "learning_rate": 9.921464160363288e-06, "loss": 18.0534, "step": 4640 }, { "epoch": 0.08483375070831886, "grad_norm": 6.109900033572359, "learning_rate": 9.921411892814527e-06, "loss": 17.3033, "step": 4641 }, { "epoch": 0.08485202990476538, "grad_norm": 6.663640655959265, "learning_rate": 9.921359608016644e-06, "loss": 17.5133, "step": 4642 }, { "epoch": 0.0848703091012119, "grad_norm": 8.913487294427815, "learning_rate": 9.921307305969822e-06, "loss": 18.4524, "step": 4643 }, { "epoch": 0.08488858829765844, "grad_norm": 8.592237863143653, "learning_rate": 9.921254986674245e-06, "loss": 18.2266, "step": 4644 }, { "epoch": 0.08490686749410496, "grad_norm": 7.136635747172818, "learning_rate": 9.921202650130098e-06, "loss": 17.7482, "step": 4645 }, { "epoch": 0.08492514669055148, "grad_norm": 9.632606101164654, "learning_rate": 9.92115029633756e-06, "loss": 19.086, "step": 4646 }, { "epoch": 0.084943425886998, "grad_norm": 7.265895092754076, "learning_rate": 9.921097925296819e-06, "loss": 17.9276, "step": 4647 }, { "epoch": 0.08496170508344453, "grad_norm": 7.297272946366155, "learning_rate": 9.921045537008057e-06, "loss": 17.635, "step": 4648 }, { "epoch": 0.08497998427989105, "grad_norm": 6.623038852274819, "learning_rate": 9.920993131471456e-06, "loss": 17.5517, "step": 4649 }, { "epoch": 0.08499826347633758, "grad_norm": 7.517246150877132, "learning_rate": 9.920940708687201e-06, "loss": 17.8812, "step": 4650 }, { "epoch": 0.0850165426727841, "grad_norm": 7.6787574631385676, "learning_rate": 9.920888268655477e-06, "loss": 18.1996, "step": 4651 }, { "epoch": 0.08503482186923063, "grad_norm": 9.662814240443105, "learning_rate": 9.920835811376468e-06, "loss": 18.6466, "step": 4652 }, { "epoch": 0.08505310106567715, "grad_norm": 7.901212720931815, "learning_rate": 9.920783336850353e-06, "loss": 18.3602, "step": 4653 }, { "epoch": 0.08507138026212367, "grad_norm": 6.018024588978739, "learning_rate": 9.920730845077323e-06, "loss": 17.4173, "step": 4654 }, { "epoch": 0.08508965945857021, "grad_norm": 6.6539443475311835, "learning_rate": 9.920678336057555e-06, "loss": 17.5248, "step": 4655 }, { "epoch": 0.08510793865501673, "grad_norm": 7.04117038173876, "learning_rate": 9.920625809791237e-06, "loss": 17.7653, "step": 4656 }, { "epoch": 0.08512621785146325, "grad_norm": 6.725694182599704, "learning_rate": 9.920573266278555e-06, "loss": 17.6774, "step": 4657 }, { "epoch": 0.08514449704790977, "grad_norm": 7.5109578583494585, "learning_rate": 9.920520705519689e-06, "loss": 18.0893, "step": 4658 }, { "epoch": 0.0851627762443563, "grad_norm": 8.935053293856278, "learning_rate": 9.920468127514827e-06, "loss": 18.1475, "step": 4659 }, { "epoch": 0.08518105544080282, "grad_norm": 6.195186641273887, "learning_rate": 9.920415532264149e-06, "loss": 17.4453, "step": 4660 }, { "epoch": 0.08519933463724935, "grad_norm": 7.545760255505536, "learning_rate": 9.920362919767841e-06, "loss": 18.1785, "step": 4661 }, { "epoch": 0.08521761383369587, "grad_norm": 6.711386547356334, "learning_rate": 9.92031029002609e-06, "loss": 17.3788, "step": 4662 }, { "epoch": 0.0852358930301424, "grad_norm": 7.660182236696144, "learning_rate": 9.920257643039076e-06, "loss": 18.1192, "step": 4663 }, { "epoch": 0.08525417222658892, "grad_norm": 7.581371210557854, "learning_rate": 9.920204978806987e-06, "loss": 18.0288, "step": 4664 }, { "epoch": 0.08527245142303544, "grad_norm": 6.448097302083825, "learning_rate": 9.920152297330009e-06, "loss": 17.3281, "step": 4665 }, { "epoch": 0.08529073061948196, "grad_norm": 8.203186645850211, "learning_rate": 9.920099598608318e-06, "loss": 18.4436, "step": 4666 }, { "epoch": 0.0853090098159285, "grad_norm": 6.542271610026919, "learning_rate": 9.920046882642111e-06, "loss": 17.41, "step": 4667 }, { "epoch": 0.08532728901237502, "grad_norm": 8.341157410091265, "learning_rate": 9.919994149431564e-06, "loss": 18.218, "step": 4668 }, { "epoch": 0.08534556820882154, "grad_norm": 7.659982388627232, "learning_rate": 9.919941398976864e-06, "loss": 18.1263, "step": 4669 }, { "epoch": 0.08536384740526806, "grad_norm": 7.88822957890354, "learning_rate": 9.919888631278199e-06, "loss": 18.1774, "step": 4670 }, { "epoch": 0.08538212660171458, "grad_norm": 7.882926322531679, "learning_rate": 9.919835846335748e-06, "loss": 18.1639, "step": 4671 }, { "epoch": 0.08540040579816112, "grad_norm": 6.636317951245712, "learning_rate": 9.919783044149701e-06, "loss": 17.3923, "step": 4672 }, { "epoch": 0.08541868499460764, "grad_norm": 6.930030128289028, "learning_rate": 9.919730224720241e-06, "loss": 17.5655, "step": 4673 }, { "epoch": 0.08543696419105416, "grad_norm": 7.387145584405131, "learning_rate": 9.919677388047553e-06, "loss": 18.1185, "step": 4674 }, { "epoch": 0.08545524338750068, "grad_norm": 7.546342043076622, "learning_rate": 9.919624534131824e-06, "loss": 17.9868, "step": 4675 }, { "epoch": 0.0854735225839472, "grad_norm": 6.770470122143234, "learning_rate": 9.919571662973238e-06, "loss": 17.5021, "step": 4676 }, { "epoch": 0.08549180178039373, "grad_norm": 6.804411522980926, "learning_rate": 9.919518774571979e-06, "loss": 17.7389, "step": 4677 }, { "epoch": 0.08551008097684026, "grad_norm": 8.267455277026194, "learning_rate": 9.919465868928234e-06, "loss": 18.3289, "step": 4678 }, { "epoch": 0.08552836017328679, "grad_norm": 8.291586811682983, "learning_rate": 9.919412946042186e-06, "loss": 18.2858, "step": 4679 }, { "epoch": 0.08554663936973331, "grad_norm": 7.4112105963364945, "learning_rate": 9.919360005914026e-06, "loss": 17.9025, "step": 4680 }, { "epoch": 0.08556491856617983, "grad_norm": 6.3738661114120285, "learning_rate": 9.919307048543933e-06, "loss": 17.4316, "step": 4681 }, { "epoch": 0.08558319776262635, "grad_norm": 6.371580144636659, "learning_rate": 9.9192540739321e-06, "loss": 17.38, "step": 4682 }, { "epoch": 0.08560147695907287, "grad_norm": 7.31919105880218, "learning_rate": 9.919201082078703e-06, "loss": 17.9746, "step": 4683 }, { "epoch": 0.08561975615551941, "grad_norm": 8.388661511336576, "learning_rate": 9.919148072983936e-06, "loss": 18.3804, "step": 4684 }, { "epoch": 0.08563803535196593, "grad_norm": 8.085792428675228, "learning_rate": 9.919095046647982e-06, "loss": 18.1638, "step": 4685 }, { "epoch": 0.08565631454841245, "grad_norm": 5.777711832221259, "learning_rate": 9.919042003071026e-06, "loss": 16.9436, "step": 4686 }, { "epoch": 0.08567459374485897, "grad_norm": 6.629114929315756, "learning_rate": 9.918988942253254e-06, "loss": 17.4534, "step": 4687 }, { "epoch": 0.0856928729413055, "grad_norm": 6.559108221099402, "learning_rate": 9.918935864194855e-06, "loss": 17.2541, "step": 4688 }, { "epoch": 0.08571115213775203, "grad_norm": 6.288491885844113, "learning_rate": 9.91888276889601e-06, "loss": 17.3603, "step": 4689 }, { "epoch": 0.08572943133419855, "grad_norm": 7.1051717030267465, "learning_rate": 9.918829656356908e-06, "loss": 17.7719, "step": 4690 }, { "epoch": 0.08574771053064507, "grad_norm": 8.21598696323983, "learning_rate": 9.918776526577735e-06, "loss": 18.1435, "step": 4691 }, { "epoch": 0.0857659897270916, "grad_norm": 6.7548119803895865, "learning_rate": 9.918723379558679e-06, "loss": 17.4835, "step": 4692 }, { "epoch": 0.08578426892353812, "grad_norm": 10.160157657831514, "learning_rate": 9.918670215299924e-06, "loss": 18.7581, "step": 4693 }, { "epoch": 0.08580254811998464, "grad_norm": 6.750081472867717, "learning_rate": 9.918617033801654e-06, "loss": 17.4776, "step": 4694 }, { "epoch": 0.08582082731643118, "grad_norm": 6.571634199227151, "learning_rate": 9.91856383506406e-06, "loss": 17.4038, "step": 4695 }, { "epoch": 0.0858391065128777, "grad_norm": 6.569445575180033, "learning_rate": 9.918510619087327e-06, "loss": 17.3845, "step": 4696 }, { "epoch": 0.08585738570932422, "grad_norm": 8.874425126433062, "learning_rate": 9.91845738587164e-06, "loss": 18.4588, "step": 4697 }, { "epoch": 0.08587566490577074, "grad_norm": 7.072914521496579, "learning_rate": 9.918404135417187e-06, "loss": 17.528, "step": 4698 }, { "epoch": 0.08589394410221726, "grad_norm": 8.364495270293883, "learning_rate": 9.918350867724156e-06, "loss": 18.5498, "step": 4699 }, { "epoch": 0.08591222329866378, "grad_norm": 7.461258301437727, "learning_rate": 9.91829758279273e-06, "loss": 17.431, "step": 4700 }, { "epoch": 0.08593050249511032, "grad_norm": 7.5937863251442375, "learning_rate": 9.9182442806231e-06, "loss": 17.7866, "step": 4701 }, { "epoch": 0.08594878169155684, "grad_norm": 8.314622980150814, "learning_rate": 9.918190961215449e-06, "loss": 18.5394, "step": 4702 }, { "epoch": 0.08596706088800336, "grad_norm": 7.14383782736478, "learning_rate": 9.918137624569964e-06, "loss": 17.626, "step": 4703 }, { "epoch": 0.08598534008444988, "grad_norm": 8.670159331909955, "learning_rate": 9.918084270686836e-06, "loss": 18.3442, "step": 4704 }, { "epoch": 0.0860036192808964, "grad_norm": 6.101264289550609, "learning_rate": 9.918030899566247e-06, "loss": 17.3995, "step": 4705 }, { "epoch": 0.08602189847734294, "grad_norm": 6.313456545793547, "learning_rate": 9.917977511208388e-06, "loss": 17.3159, "step": 4706 }, { "epoch": 0.08604017767378946, "grad_norm": 6.812465386859783, "learning_rate": 9.917924105613444e-06, "loss": 17.5959, "step": 4707 }, { "epoch": 0.08605845687023599, "grad_norm": 7.838779619179673, "learning_rate": 9.917870682781604e-06, "loss": 17.8477, "step": 4708 }, { "epoch": 0.08607673606668251, "grad_norm": 6.076232743193538, "learning_rate": 9.917817242713052e-06, "loss": 17.3116, "step": 4709 }, { "epoch": 0.08609501526312903, "grad_norm": 7.855771540727316, "learning_rate": 9.917763785407979e-06, "loss": 17.781, "step": 4710 }, { "epoch": 0.08611329445957555, "grad_norm": 8.143194125428792, "learning_rate": 9.917710310866571e-06, "loss": 18.1333, "step": 4711 }, { "epoch": 0.08613157365602209, "grad_norm": 7.099837426909703, "learning_rate": 9.917656819089013e-06, "loss": 17.6271, "step": 4712 }, { "epoch": 0.08614985285246861, "grad_norm": 8.042551395431243, "learning_rate": 9.917603310075497e-06, "loss": 18.2618, "step": 4713 }, { "epoch": 0.08616813204891513, "grad_norm": 8.082796459586074, "learning_rate": 9.917549783826205e-06, "loss": 17.8096, "step": 4714 }, { "epoch": 0.08618641124536165, "grad_norm": 6.055973030126549, "learning_rate": 9.917496240341332e-06, "loss": 17.377, "step": 4715 }, { "epoch": 0.08620469044180817, "grad_norm": 7.261066096383178, "learning_rate": 9.917442679621057e-06, "loss": 17.9038, "step": 4716 }, { "epoch": 0.0862229696382547, "grad_norm": 7.210584710222747, "learning_rate": 9.917389101665574e-06, "loss": 17.4521, "step": 4717 }, { "epoch": 0.08624124883470123, "grad_norm": 7.603368845283619, "learning_rate": 9.91733550647507e-06, "loss": 17.5343, "step": 4718 }, { "epoch": 0.08625952803114775, "grad_norm": 6.967903521521943, "learning_rate": 9.91728189404973e-06, "loss": 17.827, "step": 4719 }, { "epoch": 0.08627780722759427, "grad_norm": 7.427687752088161, "learning_rate": 9.917228264389744e-06, "loss": 18.0324, "step": 4720 }, { "epoch": 0.0862960864240408, "grad_norm": 6.799035386057385, "learning_rate": 9.9171746174953e-06, "loss": 17.6637, "step": 4721 }, { "epoch": 0.08631436562048732, "grad_norm": 8.780168374615268, "learning_rate": 9.917120953366585e-06, "loss": 18.5605, "step": 4722 }, { "epoch": 0.08633264481693385, "grad_norm": 7.326438217879566, "learning_rate": 9.917067272003789e-06, "loss": 17.785, "step": 4723 }, { "epoch": 0.08635092401338038, "grad_norm": 6.52323903292793, "learning_rate": 9.9170135734071e-06, "loss": 17.4051, "step": 4724 }, { "epoch": 0.0863692032098269, "grad_norm": 11.463202013983967, "learning_rate": 9.916959857576703e-06, "loss": 19.7018, "step": 4725 }, { "epoch": 0.08638748240627342, "grad_norm": 8.491255105504997, "learning_rate": 9.916906124512789e-06, "loss": 18.0327, "step": 4726 }, { "epoch": 0.08640576160271994, "grad_norm": 6.7005263588979265, "learning_rate": 9.916852374215545e-06, "loss": 17.5082, "step": 4727 }, { "epoch": 0.08642404079916646, "grad_norm": 7.021664812018635, "learning_rate": 9.91679860668516e-06, "loss": 17.7016, "step": 4728 }, { "epoch": 0.086442319995613, "grad_norm": 7.7617505641899225, "learning_rate": 9.916744821921824e-06, "loss": 18.0579, "step": 4729 }, { "epoch": 0.08646059919205952, "grad_norm": 6.873010943974916, "learning_rate": 9.916691019925723e-06, "loss": 17.5972, "step": 4730 }, { "epoch": 0.08647887838850604, "grad_norm": 6.5318766190601645, "learning_rate": 9.916637200697047e-06, "loss": 17.35, "step": 4731 }, { "epoch": 0.08649715758495256, "grad_norm": 6.739330700608552, "learning_rate": 9.916583364235985e-06, "loss": 17.579, "step": 4732 }, { "epoch": 0.08651543678139909, "grad_norm": 7.6456354628620975, "learning_rate": 9.916529510542722e-06, "loss": 18.0121, "step": 4733 }, { "epoch": 0.08653371597784561, "grad_norm": 7.915953905378684, "learning_rate": 9.916475639617454e-06, "loss": 18.1782, "step": 4734 }, { "epoch": 0.08655199517429214, "grad_norm": 7.95960802520631, "learning_rate": 9.916421751460363e-06, "loss": 17.9121, "step": 4735 }, { "epoch": 0.08657027437073866, "grad_norm": 7.2095480650830615, "learning_rate": 9.91636784607164e-06, "loss": 17.8019, "step": 4736 }, { "epoch": 0.08658855356718519, "grad_norm": 7.394382143574761, "learning_rate": 9.916313923451475e-06, "loss": 17.7964, "step": 4737 }, { "epoch": 0.08660683276363171, "grad_norm": 8.569244455072077, "learning_rate": 9.916259983600056e-06, "loss": 18.3305, "step": 4738 }, { "epoch": 0.08662511196007823, "grad_norm": 7.378206640272703, "learning_rate": 9.916206026517572e-06, "loss": 17.888, "step": 4739 }, { "epoch": 0.08664339115652477, "grad_norm": 8.314015635870948, "learning_rate": 9.916152052204215e-06, "loss": 18.3373, "step": 4740 }, { "epoch": 0.08666167035297129, "grad_norm": 5.5408637013582505, "learning_rate": 9.916098060660169e-06, "loss": 17.0137, "step": 4741 }, { "epoch": 0.08667994954941781, "grad_norm": 6.79331002480117, "learning_rate": 9.916044051885627e-06, "loss": 17.6932, "step": 4742 }, { "epoch": 0.08669822874586433, "grad_norm": 7.744255218459719, "learning_rate": 9.915990025880777e-06, "loss": 17.5147, "step": 4743 }, { "epoch": 0.08671650794231085, "grad_norm": 6.530015917386727, "learning_rate": 9.915935982645807e-06, "loss": 17.4595, "step": 4744 }, { "epoch": 0.08673478713875737, "grad_norm": 10.463815575400252, "learning_rate": 9.915881922180911e-06, "loss": 18.8998, "step": 4745 }, { "epoch": 0.08675306633520391, "grad_norm": 7.452022149867731, "learning_rate": 9.915827844486275e-06, "loss": 18.0894, "step": 4746 }, { "epoch": 0.08677134553165043, "grad_norm": 6.207915039399545, "learning_rate": 9.915773749562086e-06, "loss": 17.3329, "step": 4747 }, { "epoch": 0.08678962472809695, "grad_norm": 7.126148411356433, "learning_rate": 9.915719637408538e-06, "loss": 17.8531, "step": 4748 }, { "epoch": 0.08680790392454348, "grad_norm": 8.512808138400677, "learning_rate": 9.91566550802582e-06, "loss": 18.3305, "step": 4749 }, { "epoch": 0.08682618312099, "grad_norm": 7.792977232722275, "learning_rate": 9.91561136141412e-06, "loss": 18.0873, "step": 4750 }, { "epoch": 0.08684446231743652, "grad_norm": 6.565287707543799, "learning_rate": 9.915557197573631e-06, "loss": 17.6469, "step": 4751 }, { "epoch": 0.08686274151388305, "grad_norm": 8.4981894700173, "learning_rate": 9.915503016504539e-06, "loss": 18.4064, "step": 4752 }, { "epoch": 0.08688102071032958, "grad_norm": 6.643736434631383, "learning_rate": 9.915448818207035e-06, "loss": 17.5126, "step": 4753 }, { "epoch": 0.0868992999067761, "grad_norm": 6.366278565981884, "learning_rate": 9.91539460268131e-06, "loss": 17.3545, "step": 4754 }, { "epoch": 0.08691757910322262, "grad_norm": 6.333523566708861, "learning_rate": 9.915340369927553e-06, "loss": 17.4884, "step": 4755 }, { "epoch": 0.08693585829966914, "grad_norm": 7.293604589598897, "learning_rate": 9.915286119945955e-06, "loss": 17.9825, "step": 4756 }, { "epoch": 0.08695413749611568, "grad_norm": 6.0694639902843495, "learning_rate": 9.915231852736707e-06, "loss": 17.5076, "step": 4757 }, { "epoch": 0.0869724166925622, "grad_norm": 6.599357207289291, "learning_rate": 9.915177568299995e-06, "loss": 17.5141, "step": 4758 }, { "epoch": 0.08699069588900872, "grad_norm": 6.2672771878372595, "learning_rate": 9.915123266636013e-06, "loss": 17.5482, "step": 4759 }, { "epoch": 0.08700897508545524, "grad_norm": 7.239215769355867, "learning_rate": 9.915068947744953e-06, "loss": 17.615, "step": 4760 }, { "epoch": 0.08702725428190176, "grad_norm": 7.145135515623931, "learning_rate": 9.915014611627e-06, "loss": 17.8667, "step": 4761 }, { "epoch": 0.08704553347834829, "grad_norm": 7.1987700167538975, "learning_rate": 9.914960258282348e-06, "loss": 17.9123, "step": 4762 }, { "epoch": 0.08706381267479482, "grad_norm": 6.962138340360233, "learning_rate": 9.914905887711187e-06, "loss": 17.4851, "step": 4763 }, { "epoch": 0.08708209187124134, "grad_norm": 6.647227321497944, "learning_rate": 9.914851499913707e-06, "loss": 17.4062, "step": 4764 }, { "epoch": 0.08710037106768787, "grad_norm": 6.563736064089683, "learning_rate": 9.9147970948901e-06, "loss": 17.3302, "step": 4765 }, { "epoch": 0.08711865026413439, "grad_norm": 8.042800811705872, "learning_rate": 9.914742672640554e-06, "loss": 18.0595, "step": 4766 }, { "epoch": 0.08713692946058091, "grad_norm": 7.16015375190771, "learning_rate": 9.914688233165262e-06, "loss": 17.533, "step": 4767 }, { "epoch": 0.08715520865702743, "grad_norm": 8.225380882641392, "learning_rate": 9.914633776464415e-06, "loss": 17.943, "step": 4768 }, { "epoch": 0.08717348785347397, "grad_norm": 7.895018666215797, "learning_rate": 9.914579302538203e-06, "loss": 18.1833, "step": 4769 }, { "epoch": 0.08719176704992049, "grad_norm": 8.523766580365836, "learning_rate": 9.914524811386816e-06, "loss": 18.2362, "step": 4770 }, { "epoch": 0.08721004624636701, "grad_norm": 8.483258717599346, "learning_rate": 9.914470303010447e-06, "loss": 17.8643, "step": 4771 }, { "epoch": 0.08722832544281353, "grad_norm": 7.704944567254081, "learning_rate": 9.914415777409286e-06, "loss": 18.081, "step": 4772 }, { "epoch": 0.08724660463926005, "grad_norm": 7.574746124933421, "learning_rate": 9.914361234583524e-06, "loss": 17.983, "step": 4773 }, { "epoch": 0.08726488383570659, "grad_norm": 6.792623067204275, "learning_rate": 9.914306674533352e-06, "loss": 17.5434, "step": 4774 }, { "epoch": 0.08728316303215311, "grad_norm": 8.007057542549422, "learning_rate": 9.914252097258964e-06, "loss": 18.0533, "step": 4775 }, { "epoch": 0.08730144222859963, "grad_norm": 8.021933047667138, "learning_rate": 9.914197502760545e-06, "loss": 18.1056, "step": 4776 }, { "epoch": 0.08731972142504615, "grad_norm": 7.911876140707684, "learning_rate": 9.914142891038291e-06, "loss": 18.0806, "step": 4777 }, { "epoch": 0.08733800062149268, "grad_norm": 7.287706255735107, "learning_rate": 9.914088262092393e-06, "loss": 17.8173, "step": 4778 }, { "epoch": 0.0873562798179392, "grad_norm": 8.259683811458032, "learning_rate": 9.914033615923044e-06, "loss": 18.1553, "step": 4779 }, { "epoch": 0.08737455901438573, "grad_norm": 8.524263562062561, "learning_rate": 9.913978952530432e-06, "loss": 18.0442, "step": 4780 }, { "epoch": 0.08739283821083225, "grad_norm": 6.98562030134132, "learning_rate": 9.91392427191475e-06, "loss": 17.7846, "step": 4781 }, { "epoch": 0.08741111740727878, "grad_norm": 7.762663006939629, "learning_rate": 9.913869574076189e-06, "loss": 18.0809, "step": 4782 }, { "epoch": 0.0874293966037253, "grad_norm": 7.434293300734314, "learning_rate": 9.913814859014943e-06, "loss": 18.0468, "step": 4783 }, { "epoch": 0.08744767580017182, "grad_norm": 8.033458339692666, "learning_rate": 9.913760126731201e-06, "loss": 18.3616, "step": 4784 }, { "epoch": 0.08746595499661834, "grad_norm": 6.832057399654449, "learning_rate": 9.913705377225157e-06, "loss": 17.6117, "step": 4785 }, { "epoch": 0.08748423419306488, "grad_norm": 6.430135623362784, "learning_rate": 9.913650610497002e-06, "loss": 17.6004, "step": 4786 }, { "epoch": 0.0875025133895114, "grad_norm": 7.710422672894228, "learning_rate": 9.91359582654693e-06, "loss": 17.7769, "step": 4787 }, { "epoch": 0.08752079258595792, "grad_norm": 6.469440017204641, "learning_rate": 9.913541025375128e-06, "loss": 17.4185, "step": 4788 }, { "epoch": 0.08753907178240444, "grad_norm": 6.6826980747643105, "learning_rate": 9.913486206981794e-06, "loss": 17.6902, "step": 4789 }, { "epoch": 0.08755735097885096, "grad_norm": 6.941843842252527, "learning_rate": 9.913431371367115e-06, "loss": 17.7312, "step": 4790 }, { "epoch": 0.0875756301752975, "grad_norm": 7.046209410946652, "learning_rate": 9.913376518531287e-06, "loss": 17.7428, "step": 4791 }, { "epoch": 0.08759390937174402, "grad_norm": 7.9225176278770055, "learning_rate": 9.913321648474499e-06, "loss": 18.0596, "step": 4792 }, { "epoch": 0.08761218856819054, "grad_norm": 8.505779030910574, "learning_rate": 9.913266761196945e-06, "loss": 18.3003, "step": 4793 }, { "epoch": 0.08763046776463707, "grad_norm": 7.72827539043185, "learning_rate": 9.913211856698817e-06, "loss": 18.2406, "step": 4794 }, { "epoch": 0.08764874696108359, "grad_norm": 7.219869508615633, "learning_rate": 9.913156934980309e-06, "loss": 17.7627, "step": 4795 }, { "epoch": 0.08766702615753011, "grad_norm": 8.101370575717883, "learning_rate": 9.913101996041612e-06, "loss": 18.3698, "step": 4796 }, { "epoch": 0.08768530535397664, "grad_norm": 6.325286891409339, "learning_rate": 9.913047039882919e-06, "loss": 17.4737, "step": 4797 }, { "epoch": 0.08770358455042317, "grad_norm": 7.524763115589298, "learning_rate": 9.912992066504422e-06, "loss": 17.5426, "step": 4798 }, { "epoch": 0.08772186374686969, "grad_norm": 8.507126810654984, "learning_rate": 9.912937075906315e-06, "loss": 17.987, "step": 4799 }, { "epoch": 0.08774014294331621, "grad_norm": 9.281446893128436, "learning_rate": 9.91288206808879e-06, "loss": 18.6362, "step": 4800 }, { "epoch": 0.08775842213976273, "grad_norm": 8.169945727922903, "learning_rate": 9.912827043052038e-06, "loss": 18.0307, "step": 4801 }, { "epoch": 0.08777670133620925, "grad_norm": 6.166911678655189, "learning_rate": 9.912772000796253e-06, "loss": 17.4021, "step": 4802 }, { "epoch": 0.08779498053265579, "grad_norm": 7.306370136133016, "learning_rate": 9.912716941321632e-06, "loss": 18.0219, "step": 4803 }, { "epoch": 0.08781325972910231, "grad_norm": 7.204213842910007, "learning_rate": 9.912661864628362e-06, "loss": 17.8936, "step": 4804 }, { "epoch": 0.08783153892554883, "grad_norm": 6.472655632465372, "learning_rate": 9.912606770716638e-06, "loss": 17.6235, "step": 4805 }, { "epoch": 0.08784981812199535, "grad_norm": 8.738770305828906, "learning_rate": 9.912551659586655e-06, "loss": 18.3371, "step": 4806 }, { "epoch": 0.08786809731844188, "grad_norm": 9.395092312423438, "learning_rate": 9.912496531238605e-06, "loss": 18.6876, "step": 4807 }, { "epoch": 0.08788637651488841, "grad_norm": 7.667475452363174, "learning_rate": 9.912441385672679e-06, "loss": 17.7632, "step": 4808 }, { "epoch": 0.08790465571133493, "grad_norm": 6.495863960634322, "learning_rate": 9.912386222889073e-06, "loss": 17.563, "step": 4809 }, { "epoch": 0.08792293490778146, "grad_norm": 7.017018985938606, "learning_rate": 9.91233104288798e-06, "loss": 17.7194, "step": 4810 }, { "epoch": 0.08794121410422798, "grad_norm": 8.260612330153704, "learning_rate": 9.912275845669592e-06, "loss": 17.9905, "step": 4811 }, { "epoch": 0.0879594933006745, "grad_norm": 6.019433249129388, "learning_rate": 9.912220631234105e-06, "loss": 17.2822, "step": 4812 }, { "epoch": 0.08797777249712102, "grad_norm": 7.593604052343888, "learning_rate": 9.91216539958171e-06, "loss": 18.2624, "step": 4813 }, { "epoch": 0.08799605169356756, "grad_norm": 7.076792624049634, "learning_rate": 9.912110150712601e-06, "loss": 17.86, "step": 4814 }, { "epoch": 0.08801433089001408, "grad_norm": 8.710339145917724, "learning_rate": 9.912054884626974e-06, "loss": 17.9205, "step": 4815 }, { "epoch": 0.0880326100864606, "grad_norm": 9.585008062178307, "learning_rate": 9.91199960132502e-06, "loss": 19.4597, "step": 4816 }, { "epoch": 0.08805088928290712, "grad_norm": 5.69442629687945, "learning_rate": 9.911944300806932e-06, "loss": 17.1384, "step": 4817 }, { "epoch": 0.08806916847935364, "grad_norm": 7.750747831978089, "learning_rate": 9.911888983072908e-06, "loss": 18.1424, "step": 4818 }, { "epoch": 0.08808744767580017, "grad_norm": 8.011201559127326, "learning_rate": 9.911833648123139e-06, "loss": 18.2933, "step": 4819 }, { "epoch": 0.0881057268722467, "grad_norm": 8.256293581137744, "learning_rate": 9.911778295957817e-06, "loss": 18.3694, "step": 4820 }, { "epoch": 0.08812400606869322, "grad_norm": 6.96854580648387, "learning_rate": 9.911722926577141e-06, "loss": 17.6918, "step": 4821 }, { "epoch": 0.08814228526513974, "grad_norm": 7.4813761705247, "learning_rate": 9.9116675399813e-06, "loss": 18.2377, "step": 4822 }, { "epoch": 0.08816056446158627, "grad_norm": 6.499835442153083, "learning_rate": 9.911612136170492e-06, "loss": 17.6314, "step": 4823 }, { "epoch": 0.08817884365803279, "grad_norm": 9.329402462013125, "learning_rate": 9.91155671514491e-06, "loss": 17.9491, "step": 4824 }, { "epoch": 0.08819712285447932, "grad_norm": 7.134590714179622, "learning_rate": 9.911501276904746e-06, "loss": 17.8917, "step": 4825 }, { "epoch": 0.08821540205092585, "grad_norm": 6.014950998411903, "learning_rate": 9.911445821450199e-06, "loss": 17.2828, "step": 4826 }, { "epoch": 0.08823368124737237, "grad_norm": 8.36064771060975, "learning_rate": 9.911390348781458e-06, "loss": 18.8198, "step": 4827 }, { "epoch": 0.08825196044381889, "grad_norm": 6.492310583631008, "learning_rate": 9.911334858898721e-06, "loss": 17.488, "step": 4828 }, { "epoch": 0.08827023964026541, "grad_norm": 7.191727143888856, "learning_rate": 9.911279351802182e-06, "loss": 17.7162, "step": 4829 }, { "epoch": 0.08828851883671193, "grad_norm": 7.072271640359286, "learning_rate": 9.911223827492035e-06, "loss": 17.6843, "step": 4830 }, { "epoch": 0.08830679803315847, "grad_norm": 6.8461577876455655, "learning_rate": 9.911168285968474e-06, "loss": 17.5735, "step": 4831 }, { "epoch": 0.08832507722960499, "grad_norm": 8.769748706606327, "learning_rate": 9.911112727231694e-06, "loss": 18.546, "step": 4832 }, { "epoch": 0.08834335642605151, "grad_norm": 6.88971957797013, "learning_rate": 9.911057151281892e-06, "loss": 17.524, "step": 4833 }, { "epoch": 0.08836163562249803, "grad_norm": 8.044215283794703, "learning_rate": 9.911001558119258e-06, "loss": 18.0864, "step": 4834 }, { "epoch": 0.08837991481894455, "grad_norm": 7.21672603923443, "learning_rate": 9.910945947743992e-06, "loss": 17.9637, "step": 4835 }, { "epoch": 0.08839819401539108, "grad_norm": 8.055828634294114, "learning_rate": 9.910890320156285e-06, "loss": 18.0511, "step": 4836 }, { "epoch": 0.08841647321183761, "grad_norm": 7.4949272257948865, "learning_rate": 9.910834675356336e-06, "loss": 18.0165, "step": 4837 }, { "epoch": 0.08843475240828413, "grad_norm": 9.754306941590027, "learning_rate": 9.910779013344336e-06, "loss": 18.4512, "step": 4838 }, { "epoch": 0.08845303160473066, "grad_norm": 7.5429698147869235, "learning_rate": 9.910723334120482e-06, "loss": 17.7982, "step": 4839 }, { "epoch": 0.08847131080117718, "grad_norm": 6.962705737769747, "learning_rate": 9.910667637684968e-06, "loss": 17.6604, "step": 4840 }, { "epoch": 0.0884895899976237, "grad_norm": 6.602786660120906, "learning_rate": 9.91061192403799e-06, "loss": 17.5104, "step": 4841 }, { "epoch": 0.08850786919407024, "grad_norm": 8.19722256545566, "learning_rate": 9.910556193179744e-06, "loss": 18.0938, "step": 4842 }, { "epoch": 0.08852614839051676, "grad_norm": 6.613293074237941, "learning_rate": 9.910500445110424e-06, "loss": 17.3452, "step": 4843 }, { "epoch": 0.08854442758696328, "grad_norm": 7.168804242379337, "learning_rate": 9.910444679830227e-06, "loss": 17.9111, "step": 4844 }, { "epoch": 0.0885627067834098, "grad_norm": 5.794603819679539, "learning_rate": 9.910388897339347e-06, "loss": 17.1512, "step": 4845 }, { "epoch": 0.08858098597985632, "grad_norm": 7.296220367182794, "learning_rate": 9.91033309763798e-06, "loss": 17.3953, "step": 4846 }, { "epoch": 0.08859926517630284, "grad_norm": 6.924173425291186, "learning_rate": 9.910277280726322e-06, "loss": 17.7091, "step": 4847 }, { "epoch": 0.08861754437274938, "grad_norm": 7.9899815344020135, "learning_rate": 9.910221446604569e-06, "loss": 18.2742, "step": 4848 }, { "epoch": 0.0886358235691959, "grad_norm": 7.09281784394237, "learning_rate": 9.910165595272913e-06, "loss": 17.5841, "step": 4849 }, { "epoch": 0.08865410276564242, "grad_norm": 6.455267811002289, "learning_rate": 9.910109726731556e-06, "loss": 17.4705, "step": 4850 }, { "epoch": 0.08867238196208894, "grad_norm": 6.142848707058533, "learning_rate": 9.910053840980688e-06, "loss": 17.2928, "step": 4851 }, { "epoch": 0.08869066115853547, "grad_norm": 7.6149171844813655, "learning_rate": 9.90999793802051e-06, "loss": 17.9281, "step": 4852 }, { "epoch": 0.08870894035498199, "grad_norm": 6.384730092886496, "learning_rate": 9.909942017851212e-06, "loss": 17.5453, "step": 4853 }, { "epoch": 0.08872721955142852, "grad_norm": 6.718959207291093, "learning_rate": 9.909886080472997e-06, "loss": 17.5757, "step": 4854 }, { "epoch": 0.08874549874787505, "grad_norm": 5.89251746724107, "learning_rate": 9.909830125886055e-06, "loss": 17.2406, "step": 4855 }, { "epoch": 0.08876377794432157, "grad_norm": 8.454824001567015, "learning_rate": 9.909774154090584e-06, "loss": 17.9786, "step": 4856 }, { "epoch": 0.08878205714076809, "grad_norm": 6.098006609564664, "learning_rate": 9.909718165086781e-06, "loss": 17.1602, "step": 4857 }, { "epoch": 0.08880033633721461, "grad_norm": 7.064665276083377, "learning_rate": 9.909662158874845e-06, "loss": 17.7347, "step": 4858 }, { "epoch": 0.08881861553366115, "grad_norm": 6.158050025592081, "learning_rate": 9.909606135454965e-06, "loss": 17.4858, "step": 4859 }, { "epoch": 0.08883689473010767, "grad_norm": 7.752886612954595, "learning_rate": 9.909550094827343e-06, "loss": 18.1125, "step": 4860 }, { "epoch": 0.08885517392655419, "grad_norm": 8.195774319177469, "learning_rate": 9.909494036992174e-06, "loss": 18.2586, "step": 4861 }, { "epoch": 0.08887345312300071, "grad_norm": 7.892604259341922, "learning_rate": 9.909437961949655e-06, "loss": 18.0855, "step": 4862 }, { "epoch": 0.08889173231944723, "grad_norm": 7.542061459784922, "learning_rate": 9.909381869699981e-06, "loss": 18.0079, "step": 4863 }, { "epoch": 0.08891001151589376, "grad_norm": 6.491660444918835, "learning_rate": 9.90932576024335e-06, "loss": 17.4933, "step": 4864 }, { "epoch": 0.08892829071234029, "grad_norm": 8.473319499724814, "learning_rate": 9.909269633579959e-06, "loss": 18.123, "step": 4865 }, { "epoch": 0.08894656990878681, "grad_norm": 6.984022328522888, "learning_rate": 9.909213489710002e-06, "loss": 17.9093, "step": 4866 }, { "epoch": 0.08896484910523333, "grad_norm": 7.477287314187048, "learning_rate": 9.909157328633678e-06, "loss": 18.1638, "step": 4867 }, { "epoch": 0.08898312830167986, "grad_norm": 6.4279946010692495, "learning_rate": 9.909101150351186e-06, "loss": 17.2532, "step": 4868 }, { "epoch": 0.08900140749812638, "grad_norm": 6.248805498100486, "learning_rate": 9.909044954862718e-06, "loss": 17.4604, "step": 4869 }, { "epoch": 0.0890196866945729, "grad_norm": 6.761360549226345, "learning_rate": 9.908988742168474e-06, "loss": 17.7246, "step": 4870 }, { "epoch": 0.08903796589101944, "grad_norm": 7.103710072593917, "learning_rate": 9.908932512268652e-06, "loss": 17.7659, "step": 4871 }, { "epoch": 0.08905624508746596, "grad_norm": 8.684011467300552, "learning_rate": 9.908876265163446e-06, "loss": 18.0777, "step": 4872 }, { "epoch": 0.08907452428391248, "grad_norm": 6.801153374397417, "learning_rate": 9.908820000853054e-06, "loss": 17.4871, "step": 4873 }, { "epoch": 0.089092803480359, "grad_norm": 6.996604349004501, "learning_rate": 9.908763719337675e-06, "loss": 17.7669, "step": 4874 }, { "epoch": 0.08911108267680552, "grad_norm": 6.95254617222263, "learning_rate": 9.908707420617505e-06, "loss": 17.7054, "step": 4875 }, { "epoch": 0.08912936187325206, "grad_norm": 5.833851020191169, "learning_rate": 9.908651104692742e-06, "loss": 17.3961, "step": 4876 }, { "epoch": 0.08914764106969858, "grad_norm": 7.028065768944944, "learning_rate": 9.908594771563583e-06, "loss": 17.7726, "step": 4877 }, { "epoch": 0.0891659202661451, "grad_norm": 5.877460860277679, "learning_rate": 9.908538421230224e-06, "loss": 17.1981, "step": 4878 }, { "epoch": 0.08918419946259162, "grad_norm": 8.044787459744423, "learning_rate": 9.908482053692864e-06, "loss": 18.3129, "step": 4879 }, { "epoch": 0.08920247865903815, "grad_norm": 5.537464004149115, "learning_rate": 9.9084256689517e-06, "loss": 16.954, "step": 4880 }, { "epoch": 0.08922075785548467, "grad_norm": 6.421239681199296, "learning_rate": 9.908369267006932e-06, "loss": 17.4468, "step": 4881 }, { "epoch": 0.0892390370519312, "grad_norm": 7.060424910594412, "learning_rate": 9.908312847858753e-06, "loss": 17.726, "step": 4882 }, { "epoch": 0.08925731624837772, "grad_norm": 6.713220527294039, "learning_rate": 9.908256411507363e-06, "loss": 17.8644, "step": 4883 }, { "epoch": 0.08927559544482425, "grad_norm": 7.28506267164789, "learning_rate": 9.908199957952964e-06, "loss": 17.9029, "step": 4884 }, { "epoch": 0.08929387464127077, "grad_norm": 7.159823541978301, "learning_rate": 9.908143487195747e-06, "loss": 17.6019, "step": 4885 }, { "epoch": 0.08931215383771729, "grad_norm": 7.448306011287324, "learning_rate": 9.908086999235914e-06, "loss": 17.7881, "step": 4886 }, { "epoch": 0.08933043303416381, "grad_norm": 7.490854975147979, "learning_rate": 9.908030494073662e-06, "loss": 17.9684, "step": 4887 }, { "epoch": 0.08934871223061035, "grad_norm": 7.010397319852804, "learning_rate": 9.907973971709189e-06, "loss": 17.8058, "step": 4888 }, { "epoch": 0.08936699142705687, "grad_norm": 8.304541211986436, "learning_rate": 9.907917432142693e-06, "loss": 18.5242, "step": 4889 }, { "epoch": 0.08938527062350339, "grad_norm": 6.9716640880108764, "learning_rate": 9.907860875374373e-06, "loss": 17.8519, "step": 4890 }, { "epoch": 0.08940354981994991, "grad_norm": 7.075654539312293, "learning_rate": 9.907804301404424e-06, "loss": 17.5409, "step": 4891 }, { "epoch": 0.08942182901639643, "grad_norm": 7.748924403105278, "learning_rate": 9.907747710233049e-06, "loss": 18.0154, "step": 4892 }, { "epoch": 0.08944010821284297, "grad_norm": 8.908085631203509, "learning_rate": 9.907691101860444e-06, "loss": 18.3611, "step": 4893 }, { "epoch": 0.08945838740928949, "grad_norm": 6.3659490031572465, "learning_rate": 9.907634476286807e-06, "loss": 17.4746, "step": 4894 }, { "epoch": 0.08947666660573601, "grad_norm": 6.763138824222327, "learning_rate": 9.907577833512338e-06, "loss": 17.5551, "step": 4895 }, { "epoch": 0.08949494580218254, "grad_norm": 6.652661034895993, "learning_rate": 9.907521173537234e-06, "loss": 17.5258, "step": 4896 }, { "epoch": 0.08951322499862906, "grad_norm": 6.924303028863853, "learning_rate": 9.907464496361694e-06, "loss": 17.5145, "step": 4897 }, { "epoch": 0.08953150419507558, "grad_norm": 7.8742076124462805, "learning_rate": 9.907407801985916e-06, "loss": 17.8574, "step": 4898 }, { "epoch": 0.08954978339152211, "grad_norm": 7.02700500083737, "learning_rate": 9.9073510904101e-06, "loss": 17.9066, "step": 4899 }, { "epoch": 0.08956806258796864, "grad_norm": 8.325333538927772, "learning_rate": 9.907294361634445e-06, "loss": 18.1865, "step": 4900 }, { "epoch": 0.08958634178441516, "grad_norm": 7.366991577966159, "learning_rate": 9.907237615659149e-06, "loss": 17.8358, "step": 4901 }, { "epoch": 0.08960462098086168, "grad_norm": 7.4898641121745815, "learning_rate": 9.90718085248441e-06, "loss": 18.0713, "step": 4902 }, { "epoch": 0.0896229001773082, "grad_norm": 8.208024418378578, "learning_rate": 9.907124072110428e-06, "loss": 18.3265, "step": 4903 }, { "epoch": 0.08964117937375472, "grad_norm": 8.606868574897145, "learning_rate": 9.907067274537404e-06, "loss": 18.0227, "step": 4904 }, { "epoch": 0.08965945857020126, "grad_norm": 6.479156336338076, "learning_rate": 9.907010459765534e-06, "loss": 17.5345, "step": 4905 }, { "epoch": 0.08967773776664778, "grad_norm": 7.722528397467274, "learning_rate": 9.906953627795018e-06, "loss": 17.8959, "step": 4906 }, { "epoch": 0.0896960169630943, "grad_norm": 7.752109692661014, "learning_rate": 9.906896778626054e-06, "loss": 17.8554, "step": 4907 }, { "epoch": 0.08971429615954082, "grad_norm": 6.47635829372542, "learning_rate": 9.906839912258843e-06, "loss": 17.3513, "step": 4908 }, { "epoch": 0.08973257535598735, "grad_norm": 7.908803958084195, "learning_rate": 9.906783028693585e-06, "loss": 18.1248, "step": 4909 }, { "epoch": 0.08975085455243388, "grad_norm": 7.819372055678718, "learning_rate": 9.906726127930477e-06, "loss": 18.1958, "step": 4910 }, { "epoch": 0.0897691337488804, "grad_norm": 7.131489980549072, "learning_rate": 9.906669209969721e-06, "loss": 17.9138, "step": 4911 }, { "epoch": 0.08978741294532692, "grad_norm": 9.451767695330918, "learning_rate": 9.906612274811516e-06, "loss": 18.75, "step": 4912 }, { "epoch": 0.08980569214177345, "grad_norm": 8.053565778914212, "learning_rate": 9.906555322456059e-06, "loss": 18.0745, "step": 4913 }, { "epoch": 0.08982397133821997, "grad_norm": 5.741438465348301, "learning_rate": 9.90649835290355e-06, "loss": 17.3908, "step": 4914 }, { "epoch": 0.08984225053466649, "grad_norm": 7.753535854981617, "learning_rate": 9.906441366154194e-06, "loss": 18.1406, "step": 4915 }, { "epoch": 0.08986052973111303, "grad_norm": 7.358112511580242, "learning_rate": 9.906384362208183e-06, "loss": 17.8397, "step": 4916 }, { "epoch": 0.08987880892755955, "grad_norm": 7.459473289924685, "learning_rate": 9.906327341065722e-06, "loss": 18.116, "step": 4917 }, { "epoch": 0.08989708812400607, "grad_norm": 5.350762216150077, "learning_rate": 9.90627030272701e-06, "loss": 17.2346, "step": 4918 }, { "epoch": 0.08991536732045259, "grad_norm": 6.29069708317689, "learning_rate": 9.906213247192246e-06, "loss": 17.4428, "step": 4919 }, { "epoch": 0.08993364651689911, "grad_norm": 7.490105325337597, "learning_rate": 9.906156174461631e-06, "loss": 18.0593, "step": 4920 }, { "epoch": 0.08995192571334563, "grad_norm": 7.813982124712491, "learning_rate": 9.906099084535362e-06, "loss": 17.9516, "step": 4921 }, { "epoch": 0.08997020490979217, "grad_norm": 5.685757307791417, "learning_rate": 9.906041977413646e-06, "loss": 17.2931, "step": 4922 }, { "epoch": 0.08998848410623869, "grad_norm": 8.182059085116599, "learning_rate": 9.905984853096676e-06, "loss": 18.2598, "step": 4923 }, { "epoch": 0.09000676330268521, "grad_norm": 7.562270683203775, "learning_rate": 9.905927711584652e-06, "loss": 17.8086, "step": 4924 }, { "epoch": 0.09002504249913174, "grad_norm": 7.620203216056942, "learning_rate": 9.90587055287778e-06, "loss": 17.9578, "step": 4925 }, { "epoch": 0.09004332169557826, "grad_norm": 7.301869554789333, "learning_rate": 9.905813376976257e-06, "loss": 17.7887, "step": 4926 }, { "epoch": 0.09006160089202479, "grad_norm": 5.655472240721557, "learning_rate": 9.905756183880284e-06, "loss": 17.0582, "step": 4927 }, { "epoch": 0.09007988008847131, "grad_norm": 6.698368435760824, "learning_rate": 9.905698973590061e-06, "loss": 17.7889, "step": 4928 }, { "epoch": 0.09009815928491784, "grad_norm": 9.527879894553903, "learning_rate": 9.90564174610579e-06, "loss": 18.563, "step": 4929 }, { "epoch": 0.09011643848136436, "grad_norm": 7.603525625587197, "learning_rate": 9.905584501427669e-06, "loss": 17.9311, "step": 4930 }, { "epoch": 0.09013471767781088, "grad_norm": 8.398402960489525, "learning_rate": 9.905527239555899e-06, "loss": 18.5133, "step": 4931 }, { "epoch": 0.0901529968742574, "grad_norm": 7.928934032367986, "learning_rate": 9.905469960490683e-06, "loss": 18.0871, "step": 4932 }, { "epoch": 0.09017127607070394, "grad_norm": 8.12174092784868, "learning_rate": 9.905412664232222e-06, "loss": 18.1964, "step": 4933 }, { "epoch": 0.09018955526715046, "grad_norm": 6.392108989691651, "learning_rate": 9.905355350780713e-06, "loss": 17.1349, "step": 4934 }, { "epoch": 0.09020783446359698, "grad_norm": 6.851478822719325, "learning_rate": 9.90529802013636e-06, "loss": 17.6792, "step": 4935 }, { "epoch": 0.0902261136600435, "grad_norm": 6.760071373340467, "learning_rate": 9.905240672299363e-06, "loss": 17.478, "step": 4936 }, { "epoch": 0.09024439285649002, "grad_norm": 8.275567365192778, "learning_rate": 9.905183307269922e-06, "loss": 18.2972, "step": 4937 }, { "epoch": 0.09026267205293655, "grad_norm": 6.7658162244016, "learning_rate": 9.90512592504824e-06, "loss": 17.516, "step": 4938 }, { "epoch": 0.09028095124938308, "grad_norm": 6.968360606268438, "learning_rate": 9.905068525634519e-06, "loss": 17.7538, "step": 4939 }, { "epoch": 0.0902992304458296, "grad_norm": 6.082148817294284, "learning_rate": 9.905011109028957e-06, "loss": 17.4172, "step": 4940 }, { "epoch": 0.09031750964227613, "grad_norm": 7.345070877970718, "learning_rate": 9.904953675231757e-06, "loss": 17.7168, "step": 4941 }, { "epoch": 0.09033578883872265, "grad_norm": 7.134130876369556, "learning_rate": 9.90489622424312e-06, "loss": 17.6517, "step": 4942 }, { "epoch": 0.09035406803516917, "grad_norm": 7.403768346109211, "learning_rate": 9.904838756063246e-06, "loss": 18.0293, "step": 4943 }, { "epoch": 0.0903723472316157, "grad_norm": 7.579206572533839, "learning_rate": 9.904781270692338e-06, "loss": 17.9277, "step": 4944 }, { "epoch": 0.09039062642806223, "grad_norm": 7.01991912954351, "learning_rate": 9.904723768130598e-06, "loss": 17.7241, "step": 4945 }, { "epoch": 0.09040890562450875, "grad_norm": 8.3797845230764, "learning_rate": 9.904666248378228e-06, "loss": 18.167, "step": 4946 }, { "epoch": 0.09042718482095527, "grad_norm": 9.131412748192803, "learning_rate": 9.904608711435426e-06, "loss": 18.1724, "step": 4947 }, { "epoch": 0.09044546401740179, "grad_norm": 7.08883566417059, "learning_rate": 9.904551157302398e-06, "loss": 17.7404, "step": 4948 }, { "epoch": 0.09046374321384831, "grad_norm": 7.861559932400002, "learning_rate": 9.904493585979343e-06, "loss": 17.9231, "step": 4949 }, { "epoch": 0.09048202241029485, "grad_norm": 6.353152324499593, "learning_rate": 9.904435997466463e-06, "loss": 17.2822, "step": 4950 }, { "epoch": 0.09050030160674137, "grad_norm": 7.584299532570088, "learning_rate": 9.904378391763962e-06, "loss": 17.9455, "step": 4951 }, { "epoch": 0.09051858080318789, "grad_norm": 6.9418694944928445, "learning_rate": 9.90432076887204e-06, "loss": 17.6901, "step": 4952 }, { "epoch": 0.09053685999963441, "grad_norm": 8.779481141723359, "learning_rate": 9.904263128790899e-06, "loss": 18.6719, "step": 4953 }, { "epoch": 0.09055513919608094, "grad_norm": 7.6466083260157145, "learning_rate": 9.90420547152074e-06, "loss": 17.713, "step": 4954 }, { "epoch": 0.09057341839252746, "grad_norm": 6.944497802117786, "learning_rate": 9.904147797061767e-06, "loss": 17.5404, "step": 4955 }, { "epoch": 0.090591697588974, "grad_norm": 6.788706594050242, "learning_rate": 9.904090105414184e-06, "loss": 17.6658, "step": 4956 }, { "epoch": 0.09060997678542052, "grad_norm": 6.270923613989111, "learning_rate": 9.904032396578188e-06, "loss": 17.4603, "step": 4957 }, { "epoch": 0.09062825598186704, "grad_norm": 6.404011945641866, "learning_rate": 9.903974670553984e-06, "loss": 17.3798, "step": 4958 }, { "epoch": 0.09064653517831356, "grad_norm": 7.196345513098332, "learning_rate": 9.903916927341776e-06, "loss": 17.7213, "step": 4959 }, { "epoch": 0.09066481437476008, "grad_norm": 8.873870347748749, "learning_rate": 9.903859166941762e-06, "loss": 18.2185, "step": 4960 }, { "epoch": 0.09068309357120662, "grad_norm": 7.8470185747211065, "learning_rate": 9.90380138935415e-06, "loss": 17.9964, "step": 4961 }, { "epoch": 0.09070137276765314, "grad_norm": 8.355801206994641, "learning_rate": 9.903743594579139e-06, "loss": 18.1677, "step": 4962 }, { "epoch": 0.09071965196409966, "grad_norm": 6.500662505566888, "learning_rate": 9.903685782616932e-06, "loss": 17.3051, "step": 4963 }, { "epoch": 0.09073793116054618, "grad_norm": 7.8126043232462985, "learning_rate": 9.903627953467731e-06, "loss": 18.1204, "step": 4964 }, { "epoch": 0.0907562103569927, "grad_norm": 7.284690556818036, "learning_rate": 9.90357010713174e-06, "loss": 18.1257, "step": 4965 }, { "epoch": 0.09077448955343922, "grad_norm": 8.89816870388913, "learning_rate": 9.903512243609161e-06, "loss": 18.6274, "step": 4966 }, { "epoch": 0.09079276874988576, "grad_norm": 9.319042947402458, "learning_rate": 9.903454362900197e-06, "loss": 18.1239, "step": 4967 }, { "epoch": 0.09081104794633228, "grad_norm": 8.13528093699929, "learning_rate": 9.903396465005054e-06, "loss": 18.4412, "step": 4968 }, { "epoch": 0.0908293271427788, "grad_norm": 8.30774638527725, "learning_rate": 9.903338549923928e-06, "loss": 18.2453, "step": 4969 }, { "epoch": 0.09084760633922533, "grad_norm": 7.597627094489424, "learning_rate": 9.903280617657027e-06, "loss": 18.0494, "step": 4970 }, { "epoch": 0.09086588553567185, "grad_norm": 6.0707407297946645, "learning_rate": 9.903222668204553e-06, "loss": 17.3874, "step": 4971 }, { "epoch": 0.09088416473211837, "grad_norm": 6.115617491789437, "learning_rate": 9.90316470156671e-06, "loss": 17.3557, "step": 4972 }, { "epoch": 0.0909024439285649, "grad_norm": 8.01193067033986, "learning_rate": 9.9031067177437e-06, "loss": 18.2971, "step": 4973 }, { "epoch": 0.09092072312501143, "grad_norm": 8.204679748834103, "learning_rate": 9.903048716735725e-06, "loss": 17.9572, "step": 4974 }, { "epoch": 0.09093900232145795, "grad_norm": 7.497913081293649, "learning_rate": 9.902990698542992e-06, "loss": 17.9283, "step": 4975 }, { "epoch": 0.09095728151790447, "grad_norm": 7.121495298556463, "learning_rate": 9.9029326631657e-06, "loss": 17.752, "step": 4976 }, { "epoch": 0.09097556071435099, "grad_norm": 7.31486842562587, "learning_rate": 9.902874610604054e-06, "loss": 17.6785, "step": 4977 }, { "epoch": 0.09099383991079753, "grad_norm": 9.291313903532426, "learning_rate": 9.90281654085826e-06, "loss": 18.8064, "step": 4978 }, { "epoch": 0.09101211910724405, "grad_norm": 9.22651887095488, "learning_rate": 9.902758453928519e-06, "loss": 18.6168, "step": 4979 }, { "epoch": 0.09103039830369057, "grad_norm": 7.6036357163146695, "learning_rate": 9.902700349815035e-06, "loss": 17.8308, "step": 4980 }, { "epoch": 0.09104867750013709, "grad_norm": 8.618788747025125, "learning_rate": 9.90264222851801e-06, "loss": 18.4711, "step": 4981 }, { "epoch": 0.09106695669658361, "grad_norm": 6.951221154408608, "learning_rate": 9.902584090037651e-06, "loss": 17.8209, "step": 4982 }, { "epoch": 0.09108523589303014, "grad_norm": 7.433568318100594, "learning_rate": 9.90252593437416e-06, "loss": 17.7275, "step": 4983 }, { "epoch": 0.09110351508947667, "grad_norm": 8.384901533021308, "learning_rate": 9.902467761527741e-06, "loss": 18.0449, "step": 4984 }, { "epoch": 0.0911217942859232, "grad_norm": 9.209068538222029, "learning_rate": 9.902409571498598e-06, "loss": 18.3365, "step": 4985 }, { "epoch": 0.09114007348236972, "grad_norm": 8.04366712785056, "learning_rate": 9.902351364286935e-06, "loss": 18.1736, "step": 4986 }, { "epoch": 0.09115835267881624, "grad_norm": 7.321603522303677, "learning_rate": 9.902293139892956e-06, "loss": 17.9776, "step": 4987 }, { "epoch": 0.09117663187526276, "grad_norm": 8.305108390363309, "learning_rate": 9.902234898316863e-06, "loss": 18.4951, "step": 4988 }, { "epoch": 0.09119491107170928, "grad_norm": 8.580880097757682, "learning_rate": 9.902176639558865e-06, "loss": 18.4488, "step": 4989 }, { "epoch": 0.09121319026815582, "grad_norm": 7.17768760344289, "learning_rate": 9.902118363619163e-06, "loss": 17.8056, "step": 4990 }, { "epoch": 0.09123146946460234, "grad_norm": 7.56579698012164, "learning_rate": 9.902060070497958e-06, "loss": 17.8564, "step": 4991 }, { "epoch": 0.09124974866104886, "grad_norm": 6.686694426761798, "learning_rate": 9.90200176019546e-06, "loss": 17.4764, "step": 4992 }, { "epoch": 0.09126802785749538, "grad_norm": 7.136554403411247, "learning_rate": 9.901943432711872e-06, "loss": 17.6427, "step": 4993 }, { "epoch": 0.0912863070539419, "grad_norm": 6.136446274441091, "learning_rate": 9.901885088047398e-06, "loss": 17.3511, "step": 4994 }, { "epoch": 0.09130458625038844, "grad_norm": 6.57630498772527, "learning_rate": 9.901826726202242e-06, "loss": 17.8012, "step": 4995 }, { "epoch": 0.09132286544683496, "grad_norm": 7.830314180937311, "learning_rate": 9.901768347176607e-06, "loss": 17.9419, "step": 4996 }, { "epoch": 0.09134114464328148, "grad_norm": 6.2905804893550705, "learning_rate": 9.9017099509707e-06, "loss": 17.2592, "step": 4997 }, { "epoch": 0.091359423839728, "grad_norm": 7.436038988566306, "learning_rate": 9.901651537584725e-06, "loss": 17.9945, "step": 4998 }, { "epoch": 0.09137770303617453, "grad_norm": 7.705229230160873, "learning_rate": 9.901593107018887e-06, "loss": 18.1507, "step": 4999 }, { "epoch": 0.09139598223262105, "grad_norm": 6.445126525326775, "learning_rate": 9.901534659273391e-06, "loss": 17.2738, "step": 5000 }, { "epoch": 0.09141426142906758, "grad_norm": 8.461811534463543, "learning_rate": 9.901476194348441e-06, "loss": 18.2201, "step": 5001 }, { "epoch": 0.0914325406255141, "grad_norm": 6.924860783049049, "learning_rate": 9.901417712244242e-06, "loss": 17.7411, "step": 5002 }, { "epoch": 0.09145081982196063, "grad_norm": 5.649922596212064, "learning_rate": 9.901359212960999e-06, "loss": 17.3142, "step": 5003 }, { "epoch": 0.09146909901840715, "grad_norm": 6.6697417774244485, "learning_rate": 9.901300696498917e-06, "loss": 17.4254, "step": 5004 }, { "epoch": 0.09148737821485367, "grad_norm": 6.3284775144210075, "learning_rate": 9.901242162858202e-06, "loss": 17.2525, "step": 5005 }, { "epoch": 0.09150565741130019, "grad_norm": 7.828468842486411, "learning_rate": 9.901183612039058e-06, "loss": 17.6558, "step": 5006 }, { "epoch": 0.09152393660774673, "grad_norm": 6.945227607333281, "learning_rate": 9.901125044041692e-06, "loss": 17.6458, "step": 5007 }, { "epoch": 0.09154221580419325, "grad_norm": 8.33917572690878, "learning_rate": 9.901066458866306e-06, "loss": 18.0863, "step": 5008 }, { "epoch": 0.09156049500063977, "grad_norm": 8.694129911562369, "learning_rate": 9.901007856513109e-06, "loss": 18.4328, "step": 5009 }, { "epoch": 0.0915787741970863, "grad_norm": 7.191391130756593, "learning_rate": 9.900949236982305e-06, "loss": 17.7385, "step": 5010 }, { "epoch": 0.09159705339353282, "grad_norm": 7.075946211949113, "learning_rate": 9.900890600274097e-06, "loss": 17.6208, "step": 5011 }, { "epoch": 0.09161533258997935, "grad_norm": 7.265462122920251, "learning_rate": 9.900831946388696e-06, "loss": 17.6656, "step": 5012 }, { "epoch": 0.09163361178642587, "grad_norm": 7.713863902687523, "learning_rate": 9.900773275326302e-06, "loss": 17.8481, "step": 5013 }, { "epoch": 0.0916518909828724, "grad_norm": 7.118312489912055, "learning_rate": 9.900714587087123e-06, "loss": 17.8214, "step": 5014 }, { "epoch": 0.09167017017931892, "grad_norm": 8.160408961123743, "learning_rate": 9.900655881671365e-06, "loss": 18.3873, "step": 5015 }, { "epoch": 0.09168844937576544, "grad_norm": 7.1922571340580905, "learning_rate": 9.900597159079235e-06, "loss": 17.6674, "step": 5016 }, { "epoch": 0.09170672857221196, "grad_norm": 6.552480056514527, "learning_rate": 9.900538419310935e-06, "loss": 17.5884, "step": 5017 }, { "epoch": 0.0917250077686585, "grad_norm": 7.554329320381714, "learning_rate": 9.900479662366673e-06, "loss": 18.0221, "step": 5018 }, { "epoch": 0.09174328696510502, "grad_norm": 7.2563650475563515, "learning_rate": 9.900420888246657e-06, "loss": 17.7067, "step": 5019 }, { "epoch": 0.09176156616155154, "grad_norm": 7.587595855844159, "learning_rate": 9.90036209695109e-06, "loss": 17.8569, "step": 5020 }, { "epoch": 0.09177984535799806, "grad_norm": 7.588997794433961, "learning_rate": 9.900303288480178e-06, "loss": 17.9387, "step": 5021 }, { "epoch": 0.09179812455444458, "grad_norm": 7.94984395924185, "learning_rate": 9.90024446283413e-06, "loss": 18.1619, "step": 5022 }, { "epoch": 0.0918164037508911, "grad_norm": 7.614288176046099, "learning_rate": 9.90018562001315e-06, "loss": 17.8588, "step": 5023 }, { "epoch": 0.09183468294733764, "grad_norm": 7.117966262212396, "learning_rate": 9.900126760017444e-06, "loss": 17.8255, "step": 5024 }, { "epoch": 0.09185296214378416, "grad_norm": 7.546946708664759, "learning_rate": 9.900067882847218e-06, "loss": 17.7908, "step": 5025 }, { "epoch": 0.09187124134023068, "grad_norm": 6.669129871761577, "learning_rate": 9.900008988502681e-06, "loss": 17.6401, "step": 5026 }, { "epoch": 0.0918895205366772, "grad_norm": 7.45925356671338, "learning_rate": 9.899950076984038e-06, "loss": 17.9925, "step": 5027 }, { "epoch": 0.09190779973312373, "grad_norm": 7.081006160114588, "learning_rate": 9.899891148291493e-06, "loss": 17.8191, "step": 5028 }, { "epoch": 0.09192607892957026, "grad_norm": 8.581660837824266, "learning_rate": 9.899832202425256e-06, "loss": 18.2224, "step": 5029 }, { "epoch": 0.09194435812601678, "grad_norm": 7.0700686828812405, "learning_rate": 9.899773239385533e-06, "loss": 17.6403, "step": 5030 }, { "epoch": 0.0919626373224633, "grad_norm": 8.272419729877685, "learning_rate": 9.899714259172528e-06, "loss": 18.4661, "step": 5031 }, { "epoch": 0.09198091651890983, "grad_norm": 6.328804256362053, "learning_rate": 9.899655261786452e-06, "loss": 17.404, "step": 5032 }, { "epoch": 0.09199919571535635, "grad_norm": 6.251582485630017, "learning_rate": 9.899596247227508e-06, "loss": 16.9598, "step": 5033 }, { "epoch": 0.09201747491180287, "grad_norm": 7.446638770898681, "learning_rate": 9.899537215495905e-06, "loss": 18.0199, "step": 5034 }, { "epoch": 0.0920357541082494, "grad_norm": 6.4283726762518025, "learning_rate": 9.899478166591849e-06, "loss": 17.5608, "step": 5035 }, { "epoch": 0.09205403330469593, "grad_norm": 6.0300426778732374, "learning_rate": 9.899419100515547e-06, "loss": 17.3951, "step": 5036 }, { "epoch": 0.09207231250114245, "grad_norm": 8.184646622407854, "learning_rate": 9.899360017267205e-06, "loss": 17.9212, "step": 5037 }, { "epoch": 0.09209059169758897, "grad_norm": 6.638060436107272, "learning_rate": 9.899300916847034e-06, "loss": 17.4981, "step": 5038 }, { "epoch": 0.0921088708940355, "grad_norm": 5.517966476871986, "learning_rate": 9.899241799255236e-06, "loss": 17.0434, "step": 5039 }, { "epoch": 0.09212715009048202, "grad_norm": 6.673691804201888, "learning_rate": 9.899182664492022e-06, "loss": 17.409, "step": 5040 }, { "epoch": 0.09214542928692855, "grad_norm": 6.796683992349393, "learning_rate": 9.899123512557598e-06, "loss": 17.5109, "step": 5041 }, { "epoch": 0.09216370848337507, "grad_norm": 8.356684608781904, "learning_rate": 9.899064343452171e-06, "loss": 18.1871, "step": 5042 }, { "epoch": 0.0921819876798216, "grad_norm": 6.329480264267073, "learning_rate": 9.899005157175949e-06, "loss": 17.3431, "step": 5043 }, { "epoch": 0.09220026687626812, "grad_norm": 6.529968145350565, "learning_rate": 9.898945953729138e-06, "loss": 17.5718, "step": 5044 }, { "epoch": 0.09221854607271464, "grad_norm": 6.9738124871435465, "learning_rate": 9.898886733111948e-06, "loss": 17.7357, "step": 5045 }, { "epoch": 0.09223682526916117, "grad_norm": 7.533163221333678, "learning_rate": 9.898827495324582e-06, "loss": 17.8633, "step": 5046 }, { "epoch": 0.0922551044656077, "grad_norm": 7.957121859668378, "learning_rate": 9.898768240367254e-06, "loss": 17.8633, "step": 5047 }, { "epoch": 0.09227338366205422, "grad_norm": 7.630567599799659, "learning_rate": 9.898708968240168e-06, "loss": 17.6692, "step": 5048 }, { "epoch": 0.09229166285850074, "grad_norm": 7.732342182942792, "learning_rate": 9.89864967894353e-06, "loss": 17.956, "step": 5049 }, { "epoch": 0.09230994205494726, "grad_norm": 9.629260157681703, "learning_rate": 9.898590372477553e-06, "loss": 18.385, "step": 5050 }, { "epoch": 0.09232822125139378, "grad_norm": 6.843438829330014, "learning_rate": 9.898531048842439e-06, "loss": 17.8654, "step": 5051 }, { "epoch": 0.09234650044784032, "grad_norm": 6.593065429758437, "learning_rate": 9.898471708038399e-06, "loss": 17.6917, "step": 5052 }, { "epoch": 0.09236477964428684, "grad_norm": 7.098351330255455, "learning_rate": 9.898412350065643e-06, "loss": 17.6001, "step": 5053 }, { "epoch": 0.09238305884073336, "grad_norm": 6.341244681315394, "learning_rate": 9.898352974924375e-06, "loss": 17.5316, "step": 5054 }, { "epoch": 0.09240133803717988, "grad_norm": 7.729340030369656, "learning_rate": 9.898293582614807e-06, "loss": 18.3122, "step": 5055 }, { "epoch": 0.0924196172336264, "grad_norm": 7.956024790318432, "learning_rate": 9.898234173137142e-06, "loss": 17.9008, "step": 5056 }, { "epoch": 0.09243789643007293, "grad_norm": 5.546061194674989, "learning_rate": 9.898174746491593e-06, "loss": 17.1677, "step": 5057 }, { "epoch": 0.09245617562651946, "grad_norm": 6.732295760779667, "learning_rate": 9.898115302678366e-06, "loss": 17.6001, "step": 5058 }, { "epoch": 0.09247445482296598, "grad_norm": 6.84673457871143, "learning_rate": 9.898055841697671e-06, "loss": 17.5328, "step": 5059 }, { "epoch": 0.0924927340194125, "grad_norm": 7.606882429479325, "learning_rate": 9.897996363549714e-06, "loss": 18.1756, "step": 5060 }, { "epoch": 0.09251101321585903, "grad_norm": 6.977198794202178, "learning_rate": 9.897936868234706e-06, "loss": 18.0095, "step": 5061 }, { "epoch": 0.09252929241230555, "grad_norm": 9.701642926743286, "learning_rate": 9.897877355752851e-06, "loss": 18.3897, "step": 5062 }, { "epoch": 0.09254757160875209, "grad_norm": 6.549979390468102, "learning_rate": 9.897817826104364e-06, "loss": 17.5967, "step": 5063 }, { "epoch": 0.09256585080519861, "grad_norm": 8.463070939744693, "learning_rate": 9.897758279289452e-06, "loss": 18.212, "step": 5064 }, { "epoch": 0.09258413000164513, "grad_norm": 10.159200115442955, "learning_rate": 9.897698715308319e-06, "loss": 18.6003, "step": 5065 }, { "epoch": 0.09260240919809165, "grad_norm": 7.8889470110630215, "learning_rate": 9.897639134161177e-06, "loss": 18.4059, "step": 5066 }, { "epoch": 0.09262068839453817, "grad_norm": 5.849307190283272, "learning_rate": 9.897579535848236e-06, "loss": 17.1294, "step": 5067 }, { "epoch": 0.0926389675909847, "grad_norm": 7.288718858669303, "learning_rate": 9.897519920369705e-06, "loss": 17.8934, "step": 5068 }, { "epoch": 0.09265724678743123, "grad_norm": 6.839732780830549, "learning_rate": 9.89746028772579e-06, "loss": 17.7358, "step": 5069 }, { "epoch": 0.09267552598387775, "grad_norm": 7.413594438277912, "learning_rate": 9.897400637916702e-06, "loss": 18.1762, "step": 5070 }, { "epoch": 0.09269380518032427, "grad_norm": 7.062197968061983, "learning_rate": 9.89734097094265e-06, "loss": 17.5506, "step": 5071 }, { "epoch": 0.0927120843767708, "grad_norm": 7.603135725863668, "learning_rate": 9.897281286803842e-06, "loss": 17.9349, "step": 5072 }, { "epoch": 0.09273036357321732, "grad_norm": 8.952409962326763, "learning_rate": 9.89722158550049e-06, "loss": 18.6004, "step": 5073 }, { "epoch": 0.09274864276966384, "grad_norm": 6.36242751579794, "learning_rate": 9.897161867032799e-06, "loss": 17.4027, "step": 5074 }, { "epoch": 0.09276692196611037, "grad_norm": 7.772562341085767, "learning_rate": 9.897102131400981e-06, "loss": 18.0767, "step": 5075 }, { "epoch": 0.0927852011625569, "grad_norm": 6.650751263867157, "learning_rate": 9.897042378605245e-06, "loss": 17.1936, "step": 5076 }, { "epoch": 0.09280348035900342, "grad_norm": 7.866651835631159, "learning_rate": 9.896982608645802e-06, "loss": 18.0655, "step": 5077 }, { "epoch": 0.09282175955544994, "grad_norm": 7.146689741848216, "learning_rate": 9.896922821522858e-06, "loss": 17.6362, "step": 5078 }, { "epoch": 0.09284003875189646, "grad_norm": 7.2105555521385725, "learning_rate": 9.896863017236626e-06, "loss": 17.8813, "step": 5079 }, { "epoch": 0.092858317948343, "grad_norm": 8.091388948800052, "learning_rate": 9.896803195787315e-06, "loss": 18.272, "step": 5080 }, { "epoch": 0.09287659714478952, "grad_norm": 6.17918857902524, "learning_rate": 9.896743357175131e-06, "loss": 17.3417, "step": 5081 }, { "epoch": 0.09289487634123604, "grad_norm": 6.513701464355702, "learning_rate": 9.896683501400289e-06, "loss": 17.4773, "step": 5082 }, { "epoch": 0.09291315553768256, "grad_norm": 8.14203944656455, "learning_rate": 9.896623628462994e-06, "loss": 18.1933, "step": 5083 }, { "epoch": 0.09293143473412908, "grad_norm": 7.145422400847804, "learning_rate": 9.896563738363458e-06, "loss": 18.1071, "step": 5084 }, { "epoch": 0.0929497139305756, "grad_norm": 9.565680857666239, "learning_rate": 9.896503831101893e-06, "loss": 18.2842, "step": 5085 }, { "epoch": 0.09296799312702214, "grad_norm": 6.958968934837667, "learning_rate": 9.896443906678505e-06, "loss": 17.7952, "step": 5086 }, { "epoch": 0.09298627232346866, "grad_norm": 6.937333260918055, "learning_rate": 9.896383965093508e-06, "loss": 17.833, "step": 5087 }, { "epoch": 0.09300455151991519, "grad_norm": 6.517897336294009, "learning_rate": 9.896324006347109e-06, "loss": 17.704, "step": 5088 }, { "epoch": 0.0930228307163617, "grad_norm": 8.297352501735967, "learning_rate": 9.896264030439518e-06, "loss": 18.3063, "step": 5089 }, { "epoch": 0.09304110991280823, "grad_norm": 7.175602043362299, "learning_rate": 9.896204037370949e-06, "loss": 18.0502, "step": 5090 }, { "epoch": 0.09305938910925475, "grad_norm": 6.07682584332193, "learning_rate": 9.896144027141608e-06, "loss": 17.2171, "step": 5091 }, { "epoch": 0.09307766830570129, "grad_norm": 7.418730929559951, "learning_rate": 9.896083999751707e-06, "loss": 18.0613, "step": 5092 }, { "epoch": 0.09309594750214781, "grad_norm": 7.917331776771053, "learning_rate": 9.896023955201456e-06, "loss": 17.8008, "step": 5093 }, { "epoch": 0.09311422669859433, "grad_norm": 7.2096344389067895, "learning_rate": 9.895963893491068e-06, "loss": 17.872, "step": 5094 }, { "epoch": 0.09313250589504085, "grad_norm": 6.838032571383418, "learning_rate": 9.895903814620752e-06, "loss": 17.6463, "step": 5095 }, { "epoch": 0.09315078509148737, "grad_norm": 7.1996244387813135, "learning_rate": 9.895843718590715e-06, "loss": 17.8283, "step": 5096 }, { "epoch": 0.09316906428793391, "grad_norm": 6.547244638406464, "learning_rate": 9.89578360540117e-06, "loss": 17.31, "step": 5097 }, { "epoch": 0.09318734348438043, "grad_norm": 8.132690254906443, "learning_rate": 9.895723475052332e-06, "loss": 18.2499, "step": 5098 }, { "epoch": 0.09320562268082695, "grad_norm": 5.954262308126594, "learning_rate": 9.895663327544405e-06, "loss": 17.1972, "step": 5099 }, { "epoch": 0.09322390187727347, "grad_norm": 7.4598041309153995, "learning_rate": 9.895603162877606e-06, "loss": 17.6992, "step": 5100 }, { "epoch": 0.09324218107372, "grad_norm": 7.397237648454, "learning_rate": 9.89554298105214e-06, "loss": 17.9287, "step": 5101 }, { "epoch": 0.09326046027016652, "grad_norm": 6.987429839174725, "learning_rate": 9.89548278206822e-06, "loss": 17.5882, "step": 5102 }, { "epoch": 0.09327873946661305, "grad_norm": 8.65559946879881, "learning_rate": 9.89542256592606e-06, "loss": 18.4917, "step": 5103 }, { "epoch": 0.09329701866305957, "grad_norm": 7.200226542093818, "learning_rate": 9.895362332625867e-06, "loss": 17.8861, "step": 5104 }, { "epoch": 0.0933152978595061, "grad_norm": 6.4526778792290225, "learning_rate": 9.895302082167854e-06, "loss": 17.6469, "step": 5105 }, { "epoch": 0.09333357705595262, "grad_norm": 7.965510530399193, "learning_rate": 9.895241814552234e-06, "loss": 18.2435, "step": 5106 }, { "epoch": 0.09335185625239914, "grad_norm": 8.022839044254871, "learning_rate": 9.895181529779214e-06, "loss": 18.2214, "step": 5107 }, { "epoch": 0.09337013544884566, "grad_norm": 7.814000778915349, "learning_rate": 9.895121227849009e-06, "loss": 18.5997, "step": 5108 }, { "epoch": 0.0933884146452922, "grad_norm": 7.707830413959619, "learning_rate": 9.895060908761829e-06, "loss": 18.0669, "step": 5109 }, { "epoch": 0.09340669384173872, "grad_norm": 5.822862398431597, "learning_rate": 9.895000572517883e-06, "loss": 17.2046, "step": 5110 }, { "epoch": 0.09342497303818524, "grad_norm": 7.583345046065925, "learning_rate": 9.894940219117386e-06, "loss": 17.8696, "step": 5111 }, { "epoch": 0.09344325223463176, "grad_norm": 7.070215747881176, "learning_rate": 9.89487984856055e-06, "loss": 17.8606, "step": 5112 }, { "epoch": 0.09346153143107828, "grad_norm": 8.945290950233952, "learning_rate": 9.894819460847583e-06, "loss": 18.428, "step": 5113 }, { "epoch": 0.09347981062752482, "grad_norm": 6.672497587160126, "learning_rate": 9.894759055978698e-06, "loss": 17.673, "step": 5114 }, { "epoch": 0.09349808982397134, "grad_norm": 7.727826447480226, "learning_rate": 9.89469863395411e-06, "loss": 17.9795, "step": 5115 }, { "epoch": 0.09351636902041786, "grad_norm": 7.988666697174414, "learning_rate": 9.894638194774026e-06, "loss": 17.7235, "step": 5116 }, { "epoch": 0.09353464821686439, "grad_norm": 7.320396905888832, "learning_rate": 9.89457773843866e-06, "loss": 18.0135, "step": 5117 }, { "epoch": 0.09355292741331091, "grad_norm": 8.546963096960763, "learning_rate": 9.894517264948223e-06, "loss": 17.8598, "step": 5118 }, { "epoch": 0.09357120660975743, "grad_norm": 9.650559515995681, "learning_rate": 9.89445677430293e-06, "loss": 18.5405, "step": 5119 }, { "epoch": 0.09358948580620396, "grad_norm": 6.276272669282278, "learning_rate": 9.894396266502988e-06, "loss": 17.2553, "step": 5120 }, { "epoch": 0.09360776500265049, "grad_norm": 8.242730452685143, "learning_rate": 9.894335741548612e-06, "loss": 18.464, "step": 5121 }, { "epoch": 0.09362604419909701, "grad_norm": 7.62750328126906, "learning_rate": 9.894275199440017e-06, "loss": 17.6168, "step": 5122 }, { "epoch": 0.09364432339554353, "grad_norm": 8.633977352790344, "learning_rate": 9.894214640177411e-06, "loss": 18.4184, "step": 5123 }, { "epoch": 0.09366260259199005, "grad_norm": 6.397828277696224, "learning_rate": 9.894154063761005e-06, "loss": 17.3383, "step": 5124 }, { "epoch": 0.09368088178843657, "grad_norm": 7.15244964329956, "learning_rate": 9.894093470191016e-06, "loss": 17.741, "step": 5125 }, { "epoch": 0.09369916098488311, "grad_norm": 7.8472696501581325, "learning_rate": 9.894032859467653e-06, "loss": 17.6663, "step": 5126 }, { "epoch": 0.09371744018132963, "grad_norm": 5.85358871564374, "learning_rate": 9.893972231591131e-06, "loss": 17.1793, "step": 5127 }, { "epoch": 0.09373571937777615, "grad_norm": 6.8866450513713495, "learning_rate": 9.89391158656166e-06, "loss": 17.7025, "step": 5128 }, { "epoch": 0.09375399857422267, "grad_norm": 8.32418053354413, "learning_rate": 9.893850924379453e-06, "loss": 17.9356, "step": 5129 }, { "epoch": 0.0937722777706692, "grad_norm": 6.706146911778752, "learning_rate": 9.893790245044723e-06, "loss": 17.4315, "step": 5130 }, { "epoch": 0.09379055696711573, "grad_norm": 9.072350200702102, "learning_rate": 9.893729548557684e-06, "loss": 18.5131, "step": 5131 }, { "epoch": 0.09380883616356225, "grad_norm": 7.738821989454404, "learning_rate": 9.893668834918547e-06, "loss": 17.5097, "step": 5132 }, { "epoch": 0.09382711536000878, "grad_norm": 6.246479875134676, "learning_rate": 9.893608104127525e-06, "loss": 17.5148, "step": 5133 }, { "epoch": 0.0938453945564553, "grad_norm": 7.412553148077846, "learning_rate": 9.893547356184832e-06, "loss": 17.6774, "step": 5134 }, { "epoch": 0.09386367375290182, "grad_norm": 6.0480645864262055, "learning_rate": 9.89348659109068e-06, "loss": 17.2246, "step": 5135 }, { "epoch": 0.09388195294934834, "grad_norm": 6.586074664871277, "learning_rate": 9.893425808845283e-06, "loss": 17.566, "step": 5136 }, { "epoch": 0.09390023214579488, "grad_norm": 7.415747677424086, "learning_rate": 9.893365009448853e-06, "loss": 17.7678, "step": 5137 }, { "epoch": 0.0939185113422414, "grad_norm": 6.840226891812664, "learning_rate": 9.893304192901601e-06, "loss": 17.4448, "step": 5138 }, { "epoch": 0.09393679053868792, "grad_norm": 7.7067826992775945, "learning_rate": 9.893243359203743e-06, "loss": 18.0775, "step": 5139 }, { "epoch": 0.09395506973513444, "grad_norm": 7.110277590355808, "learning_rate": 9.893182508355493e-06, "loss": 17.7244, "step": 5140 }, { "epoch": 0.09397334893158096, "grad_norm": 7.988985961336596, "learning_rate": 9.893121640357063e-06, "loss": 18.1297, "step": 5141 }, { "epoch": 0.09399162812802749, "grad_norm": 6.612055979101377, "learning_rate": 9.893060755208666e-06, "loss": 17.2624, "step": 5142 }, { "epoch": 0.09400990732447402, "grad_norm": 6.668732972463614, "learning_rate": 9.892999852910515e-06, "loss": 17.4488, "step": 5143 }, { "epoch": 0.09402818652092054, "grad_norm": 7.944290272730593, "learning_rate": 9.892938933462824e-06, "loss": 18.0692, "step": 5144 }, { "epoch": 0.09404646571736706, "grad_norm": 8.289932641940927, "learning_rate": 9.892877996865807e-06, "loss": 18.2843, "step": 5145 }, { "epoch": 0.09406474491381359, "grad_norm": 6.047733902583171, "learning_rate": 9.892817043119679e-06, "loss": 17.3479, "step": 5146 }, { "epoch": 0.09408302411026011, "grad_norm": 7.676574651211155, "learning_rate": 9.89275607222465e-06, "loss": 17.9895, "step": 5147 }, { "epoch": 0.09410130330670664, "grad_norm": 6.572160583036424, "learning_rate": 9.892695084180934e-06, "loss": 17.495, "step": 5148 }, { "epoch": 0.09411958250315317, "grad_norm": 7.760488971814796, "learning_rate": 9.892634078988748e-06, "loss": 17.8998, "step": 5149 }, { "epoch": 0.09413786169959969, "grad_norm": 6.831034321957358, "learning_rate": 9.892573056648305e-06, "loss": 17.8413, "step": 5150 }, { "epoch": 0.09415614089604621, "grad_norm": 6.667496624357254, "learning_rate": 9.892512017159817e-06, "loss": 17.3853, "step": 5151 }, { "epoch": 0.09417442009249273, "grad_norm": 7.427303461028795, "learning_rate": 9.892450960523499e-06, "loss": 17.6116, "step": 5152 }, { "epoch": 0.09419269928893925, "grad_norm": 7.9004683741857535, "learning_rate": 9.892389886739563e-06, "loss": 18.2583, "step": 5153 }, { "epoch": 0.09421097848538579, "grad_norm": 9.096128004383484, "learning_rate": 9.892328795808228e-06, "loss": 18.4646, "step": 5154 }, { "epoch": 0.09422925768183231, "grad_norm": 7.952199273037197, "learning_rate": 9.892267687729704e-06, "loss": 18.2197, "step": 5155 }, { "epoch": 0.09424753687827883, "grad_norm": 6.217855809239918, "learning_rate": 9.892206562504207e-06, "loss": 17.389, "step": 5156 }, { "epoch": 0.09426581607472535, "grad_norm": 6.659049781394738, "learning_rate": 9.89214542013195e-06, "loss": 17.3625, "step": 5157 }, { "epoch": 0.09428409527117187, "grad_norm": 7.4188287578494565, "learning_rate": 9.892084260613148e-06, "loss": 17.8858, "step": 5158 }, { "epoch": 0.0943023744676184, "grad_norm": 8.965030987127742, "learning_rate": 9.892023083948015e-06, "loss": 18.5783, "step": 5159 }, { "epoch": 0.09432065366406493, "grad_norm": 7.567770700328357, "learning_rate": 9.891961890136766e-06, "loss": 17.9085, "step": 5160 }, { "epoch": 0.09433893286051145, "grad_norm": 6.064575987108541, "learning_rate": 9.891900679179614e-06, "loss": 17.2038, "step": 5161 }, { "epoch": 0.09435721205695798, "grad_norm": 7.434547395908676, "learning_rate": 9.891839451076775e-06, "loss": 17.9291, "step": 5162 }, { "epoch": 0.0943754912534045, "grad_norm": 7.773537562494793, "learning_rate": 9.891778205828465e-06, "loss": 18.3101, "step": 5163 }, { "epoch": 0.09439377044985102, "grad_norm": 6.476420851524025, "learning_rate": 9.891716943434893e-06, "loss": 17.4162, "step": 5164 }, { "epoch": 0.09441204964629756, "grad_norm": 7.067572833939982, "learning_rate": 9.89165566389628e-06, "loss": 17.9595, "step": 5165 }, { "epoch": 0.09443032884274408, "grad_norm": 6.601796886973542, "learning_rate": 9.891594367212837e-06, "loss": 17.614, "step": 5166 }, { "epoch": 0.0944486080391906, "grad_norm": 8.40956824993968, "learning_rate": 9.891533053384784e-06, "loss": 18.378, "step": 5167 }, { "epoch": 0.09446688723563712, "grad_norm": 7.105079782615868, "learning_rate": 9.89147172241233e-06, "loss": 17.7088, "step": 5168 }, { "epoch": 0.09448516643208364, "grad_norm": 9.065696790013208, "learning_rate": 9.89141037429569e-06, "loss": 18.3997, "step": 5169 }, { "epoch": 0.09450344562853016, "grad_norm": 7.408161416479056, "learning_rate": 9.891349009035082e-06, "loss": 17.6111, "step": 5170 }, { "epoch": 0.0945217248249767, "grad_norm": 6.676298820376263, "learning_rate": 9.891287626630721e-06, "loss": 17.4411, "step": 5171 }, { "epoch": 0.09454000402142322, "grad_norm": 7.4254242397519254, "learning_rate": 9.891226227082822e-06, "loss": 17.8507, "step": 5172 }, { "epoch": 0.09455828321786974, "grad_norm": 6.595901024711817, "learning_rate": 9.891164810391599e-06, "loss": 17.7702, "step": 5173 }, { "epoch": 0.09457656241431626, "grad_norm": 6.2201256864739625, "learning_rate": 9.891103376557268e-06, "loss": 17.4838, "step": 5174 }, { "epoch": 0.09459484161076279, "grad_norm": 8.051874223536906, "learning_rate": 9.891041925580043e-06, "loss": 18.1326, "step": 5175 }, { "epoch": 0.09461312080720931, "grad_norm": 6.810582802101014, "learning_rate": 9.890980457460143e-06, "loss": 17.8296, "step": 5176 }, { "epoch": 0.09463140000365584, "grad_norm": 8.420908316405063, "learning_rate": 9.890918972197777e-06, "loss": 18.1608, "step": 5177 }, { "epoch": 0.09464967920010237, "grad_norm": 6.736216916777759, "learning_rate": 9.890857469793168e-06, "loss": 17.6606, "step": 5178 }, { "epoch": 0.09466795839654889, "grad_norm": 6.118770812982109, "learning_rate": 9.890795950246526e-06, "loss": 17.2436, "step": 5179 }, { "epoch": 0.09468623759299541, "grad_norm": 6.537623612244409, "learning_rate": 9.890734413558068e-06, "loss": 17.6136, "step": 5180 }, { "epoch": 0.09470451678944193, "grad_norm": 6.320666536498495, "learning_rate": 9.890672859728013e-06, "loss": 17.4728, "step": 5181 }, { "epoch": 0.09472279598588847, "grad_norm": 7.744391672160248, "learning_rate": 9.89061128875657e-06, "loss": 17.9127, "step": 5182 }, { "epoch": 0.09474107518233499, "grad_norm": 6.3098633894761065, "learning_rate": 9.890549700643962e-06, "loss": 17.379, "step": 5183 }, { "epoch": 0.09475935437878151, "grad_norm": 7.4749505601951345, "learning_rate": 9.890488095390401e-06, "loss": 18.1063, "step": 5184 }, { "epoch": 0.09477763357522803, "grad_norm": 7.196265894310334, "learning_rate": 9.890426472996104e-06, "loss": 18.1147, "step": 5185 }, { "epoch": 0.09479591277167455, "grad_norm": 7.06196872795695, "learning_rate": 9.890364833461285e-06, "loss": 17.3641, "step": 5186 }, { "epoch": 0.09481419196812108, "grad_norm": 8.512656954267568, "learning_rate": 9.890303176786163e-06, "loss": 18.4658, "step": 5187 }, { "epoch": 0.09483247116456761, "grad_norm": 7.498556706900513, "learning_rate": 9.890241502970952e-06, "loss": 17.4943, "step": 5188 }, { "epoch": 0.09485075036101413, "grad_norm": 6.214599113020599, "learning_rate": 9.89017981201587e-06, "loss": 17.2711, "step": 5189 }, { "epoch": 0.09486902955746065, "grad_norm": 6.733266265875151, "learning_rate": 9.890118103921132e-06, "loss": 17.4952, "step": 5190 }, { "epoch": 0.09488730875390718, "grad_norm": 6.724170248327515, "learning_rate": 9.890056378686952e-06, "loss": 17.1852, "step": 5191 }, { "epoch": 0.0949055879503537, "grad_norm": 7.085453016784695, "learning_rate": 9.889994636313551e-06, "loss": 17.8099, "step": 5192 }, { "epoch": 0.09492386714680022, "grad_norm": 8.154429760869085, "learning_rate": 9.889932876801143e-06, "loss": 18.3366, "step": 5193 }, { "epoch": 0.09494214634324676, "grad_norm": 6.980399329141078, "learning_rate": 9.889871100149944e-06, "loss": 17.8377, "step": 5194 }, { "epoch": 0.09496042553969328, "grad_norm": 8.254654376262852, "learning_rate": 9.889809306360171e-06, "loss": 17.9285, "step": 5195 }, { "epoch": 0.0949787047361398, "grad_norm": 7.801505937494357, "learning_rate": 9.88974749543204e-06, "loss": 17.6987, "step": 5196 }, { "epoch": 0.09499698393258632, "grad_norm": 7.095195996412267, "learning_rate": 9.88968566736577e-06, "loss": 17.8568, "step": 5197 }, { "epoch": 0.09501526312903284, "grad_norm": 7.474276996536663, "learning_rate": 9.889623822161575e-06, "loss": 18.023, "step": 5198 }, { "epoch": 0.09503354232547938, "grad_norm": 6.559234082417345, "learning_rate": 9.889561959819673e-06, "loss": 17.7538, "step": 5199 }, { "epoch": 0.0950518215219259, "grad_norm": 8.300625114491275, "learning_rate": 9.889500080340281e-06, "loss": 17.8038, "step": 5200 }, { "epoch": 0.09507010071837242, "grad_norm": 8.68399699423883, "learning_rate": 9.889438183723616e-06, "loss": 18.3276, "step": 5201 }, { "epoch": 0.09508837991481894, "grad_norm": 5.858054324353686, "learning_rate": 9.889376269969895e-06, "loss": 17.1554, "step": 5202 }, { "epoch": 0.09510665911126547, "grad_norm": 6.728176275885843, "learning_rate": 9.889314339079332e-06, "loss": 17.4803, "step": 5203 }, { "epoch": 0.09512493830771199, "grad_norm": 6.44781365917043, "learning_rate": 9.88925239105215e-06, "loss": 17.5822, "step": 5204 }, { "epoch": 0.09514321750415852, "grad_norm": 6.417523267951457, "learning_rate": 9.889190425888558e-06, "loss": 17.4344, "step": 5205 }, { "epoch": 0.09516149670060504, "grad_norm": 7.042317903426137, "learning_rate": 9.889128443588781e-06, "loss": 17.7418, "step": 5206 }, { "epoch": 0.09517977589705157, "grad_norm": 7.397451006051058, "learning_rate": 9.889066444153032e-06, "loss": 17.7729, "step": 5207 }, { "epoch": 0.09519805509349809, "grad_norm": 7.143273439551852, "learning_rate": 9.88900442758153e-06, "loss": 18.024, "step": 5208 }, { "epoch": 0.09521633428994461, "grad_norm": 8.615455102068974, "learning_rate": 9.88894239387449e-06, "loss": 18.2291, "step": 5209 }, { "epoch": 0.09523461348639113, "grad_norm": 7.529587476696245, "learning_rate": 9.888880343032133e-06, "loss": 17.7377, "step": 5210 }, { "epoch": 0.09525289268283767, "grad_norm": 6.63088867569911, "learning_rate": 9.888818275054675e-06, "loss": 17.6644, "step": 5211 }, { "epoch": 0.09527117187928419, "grad_norm": 7.519668617792681, "learning_rate": 9.888756189942333e-06, "loss": 18.1103, "step": 5212 }, { "epoch": 0.09528945107573071, "grad_norm": 6.972288231076231, "learning_rate": 9.888694087695323e-06, "loss": 17.536, "step": 5213 }, { "epoch": 0.09530773027217723, "grad_norm": 7.115601967477167, "learning_rate": 9.888631968313866e-06, "loss": 17.6653, "step": 5214 }, { "epoch": 0.09532600946862375, "grad_norm": 6.747204981196159, "learning_rate": 9.888569831798178e-06, "loss": 17.5997, "step": 5215 }, { "epoch": 0.09534428866507029, "grad_norm": 7.6468415772145555, "learning_rate": 9.888507678148475e-06, "loss": 18.1247, "step": 5216 }, { "epoch": 0.09536256786151681, "grad_norm": 7.070793850550537, "learning_rate": 9.88844550736498e-06, "loss": 17.4151, "step": 5217 }, { "epoch": 0.09538084705796333, "grad_norm": 6.474401361103499, "learning_rate": 9.888383319447905e-06, "loss": 17.2561, "step": 5218 }, { "epoch": 0.09539912625440986, "grad_norm": 7.58341485224294, "learning_rate": 9.888321114397472e-06, "loss": 18.0286, "step": 5219 }, { "epoch": 0.09541740545085638, "grad_norm": 6.972150944961971, "learning_rate": 9.888258892213898e-06, "loss": 17.8591, "step": 5220 }, { "epoch": 0.0954356846473029, "grad_norm": 6.744493413140763, "learning_rate": 9.8881966528974e-06, "loss": 17.5452, "step": 5221 }, { "epoch": 0.09545396384374943, "grad_norm": 7.873660672430703, "learning_rate": 9.888134396448198e-06, "loss": 17.9007, "step": 5222 }, { "epoch": 0.09547224304019596, "grad_norm": 7.298583415260131, "learning_rate": 9.888072122866508e-06, "loss": 17.6763, "step": 5223 }, { "epoch": 0.09549052223664248, "grad_norm": 6.003618951036251, "learning_rate": 9.888009832152549e-06, "loss": 17.2141, "step": 5224 }, { "epoch": 0.095508801433089, "grad_norm": 6.2177497010827105, "learning_rate": 9.887947524306541e-06, "loss": 17.4351, "step": 5225 }, { "epoch": 0.09552708062953552, "grad_norm": 6.666056134453053, "learning_rate": 9.8878851993287e-06, "loss": 17.6842, "step": 5226 }, { "epoch": 0.09554535982598204, "grad_norm": 6.516298719067309, "learning_rate": 9.887822857219248e-06, "loss": 17.4614, "step": 5227 }, { "epoch": 0.09556363902242858, "grad_norm": 7.024816101465736, "learning_rate": 9.887760497978397e-06, "loss": 17.7201, "step": 5228 }, { "epoch": 0.0955819182188751, "grad_norm": 7.200756136312985, "learning_rate": 9.887698121606374e-06, "loss": 17.7464, "step": 5229 }, { "epoch": 0.09560019741532162, "grad_norm": 7.3783861132193564, "learning_rate": 9.887635728103388e-06, "loss": 17.7704, "step": 5230 }, { "epoch": 0.09561847661176814, "grad_norm": 7.2206811986317945, "learning_rate": 9.887573317469668e-06, "loss": 17.6605, "step": 5231 }, { "epoch": 0.09563675580821467, "grad_norm": 8.70079828108925, "learning_rate": 9.887510889705426e-06, "loss": 18.4645, "step": 5232 }, { "epoch": 0.0956550350046612, "grad_norm": 7.636005799765782, "learning_rate": 9.887448444810881e-06, "loss": 17.893, "step": 5233 }, { "epoch": 0.09567331420110772, "grad_norm": 6.993684305512817, "learning_rate": 9.887385982786252e-06, "loss": 17.7158, "step": 5234 }, { "epoch": 0.09569159339755424, "grad_norm": 6.608801131132104, "learning_rate": 9.887323503631762e-06, "loss": 17.3909, "step": 5235 }, { "epoch": 0.09570987259400077, "grad_norm": 6.771682563084261, "learning_rate": 9.887261007347627e-06, "loss": 17.3143, "step": 5236 }, { "epoch": 0.09572815179044729, "grad_norm": 7.950569785671081, "learning_rate": 9.887198493934067e-06, "loss": 18.1489, "step": 5237 }, { "epoch": 0.09574643098689381, "grad_norm": 7.3982490836298656, "learning_rate": 9.8871359633913e-06, "loss": 17.8159, "step": 5238 }, { "epoch": 0.09576471018334035, "grad_norm": 6.990996331506278, "learning_rate": 9.887073415719544e-06, "loss": 17.5424, "step": 5239 }, { "epoch": 0.09578298937978687, "grad_norm": 7.710399343547748, "learning_rate": 9.88701085091902e-06, "loss": 17.9353, "step": 5240 }, { "epoch": 0.09580126857623339, "grad_norm": 8.072937592461615, "learning_rate": 9.886948268989947e-06, "loss": 18.0326, "step": 5241 }, { "epoch": 0.09581954777267991, "grad_norm": 7.40238496151935, "learning_rate": 9.886885669932545e-06, "loss": 17.9291, "step": 5242 }, { "epoch": 0.09583782696912643, "grad_norm": 7.78551859722493, "learning_rate": 9.886823053747034e-06, "loss": 18.1532, "step": 5243 }, { "epoch": 0.09585610616557295, "grad_norm": 7.8382493474777, "learning_rate": 9.88676042043363e-06, "loss": 18.0345, "step": 5244 }, { "epoch": 0.09587438536201949, "grad_norm": 7.25646190192345, "learning_rate": 9.886697769992556e-06, "loss": 17.8306, "step": 5245 }, { "epoch": 0.09589266455846601, "grad_norm": 10.804791626584706, "learning_rate": 9.88663510242403e-06, "loss": 18.711, "step": 5246 }, { "epoch": 0.09591094375491253, "grad_norm": 6.201093743279867, "learning_rate": 9.886572417728274e-06, "loss": 17.3102, "step": 5247 }, { "epoch": 0.09592922295135906, "grad_norm": 8.3642670823898, "learning_rate": 9.886509715905502e-06, "loss": 17.9239, "step": 5248 }, { "epoch": 0.09594750214780558, "grad_norm": 7.276983929820145, "learning_rate": 9.886446996955939e-06, "loss": 17.8457, "step": 5249 }, { "epoch": 0.09596578134425211, "grad_norm": 6.708835063338171, "learning_rate": 9.886384260879804e-06, "loss": 17.6739, "step": 5250 }, { "epoch": 0.09598406054069863, "grad_norm": 7.093126797744419, "learning_rate": 9.886321507677316e-06, "loss": 17.8085, "step": 5251 }, { "epoch": 0.09600233973714516, "grad_norm": 6.497532496574756, "learning_rate": 9.886258737348695e-06, "loss": 17.4002, "step": 5252 }, { "epoch": 0.09602061893359168, "grad_norm": 6.969377082513094, "learning_rate": 9.886195949894162e-06, "loss": 17.582, "step": 5253 }, { "epoch": 0.0960388981300382, "grad_norm": 5.624883262985429, "learning_rate": 9.886133145313934e-06, "loss": 17.3625, "step": 5254 }, { "epoch": 0.09605717732648472, "grad_norm": 7.3925160558442915, "learning_rate": 9.886070323608236e-06, "loss": 17.7853, "step": 5255 }, { "epoch": 0.09607545652293126, "grad_norm": 6.872808240356434, "learning_rate": 9.886007484777284e-06, "loss": 17.6445, "step": 5256 }, { "epoch": 0.09609373571937778, "grad_norm": 6.041244016989075, "learning_rate": 9.8859446288213e-06, "loss": 17.5063, "step": 5257 }, { "epoch": 0.0961120149158243, "grad_norm": 6.506616084585259, "learning_rate": 9.885881755740503e-06, "loss": 17.3917, "step": 5258 }, { "epoch": 0.09613029411227082, "grad_norm": 5.72404838769049, "learning_rate": 9.885818865535115e-06, "loss": 17.0782, "step": 5259 }, { "epoch": 0.09614857330871734, "grad_norm": 5.600220206652367, "learning_rate": 9.885755958205357e-06, "loss": 17.2309, "step": 5260 }, { "epoch": 0.09616685250516387, "grad_norm": 8.256578669744444, "learning_rate": 9.885693033751447e-06, "loss": 18.4663, "step": 5261 }, { "epoch": 0.0961851317016104, "grad_norm": 7.469744356520143, "learning_rate": 9.885630092173608e-06, "loss": 17.8741, "step": 5262 }, { "epoch": 0.09620341089805692, "grad_norm": 9.012604336564129, "learning_rate": 9.885567133472059e-06, "loss": 18.3334, "step": 5263 }, { "epoch": 0.09622169009450345, "grad_norm": 6.558039475714017, "learning_rate": 9.88550415764702e-06, "loss": 17.8242, "step": 5264 }, { "epoch": 0.09623996929094997, "grad_norm": 9.286626068016695, "learning_rate": 9.885441164698715e-06, "loss": 18.3687, "step": 5265 }, { "epoch": 0.09625824848739649, "grad_norm": 6.79502705833962, "learning_rate": 9.885378154627362e-06, "loss": 17.3884, "step": 5266 }, { "epoch": 0.09627652768384302, "grad_norm": 7.629505796668096, "learning_rate": 9.885315127433182e-06, "loss": 17.9658, "step": 5267 }, { "epoch": 0.09629480688028955, "grad_norm": 7.347670861701307, "learning_rate": 9.885252083116398e-06, "loss": 17.5675, "step": 5268 }, { "epoch": 0.09631308607673607, "grad_norm": 7.17750685681595, "learning_rate": 9.885189021677226e-06, "loss": 17.7342, "step": 5269 }, { "epoch": 0.09633136527318259, "grad_norm": 7.1640515597335, "learning_rate": 9.885125943115892e-06, "loss": 17.7269, "step": 5270 }, { "epoch": 0.09634964446962911, "grad_norm": 7.506204128755075, "learning_rate": 9.885062847432615e-06, "loss": 17.8263, "step": 5271 }, { "epoch": 0.09636792366607563, "grad_norm": 8.211680854441479, "learning_rate": 9.884999734627618e-06, "loss": 18.2696, "step": 5272 }, { "epoch": 0.09638620286252217, "grad_norm": 7.649439093091455, "learning_rate": 9.884936604701122e-06, "loss": 17.6967, "step": 5273 }, { "epoch": 0.09640448205896869, "grad_norm": 7.083866308988564, "learning_rate": 9.884873457653345e-06, "loss": 17.6615, "step": 5274 }, { "epoch": 0.09642276125541521, "grad_norm": 8.367405155710328, "learning_rate": 9.88481029348451e-06, "loss": 18.1005, "step": 5275 }, { "epoch": 0.09644104045186173, "grad_norm": 7.022332031462355, "learning_rate": 9.884747112194839e-06, "loss": 17.7434, "step": 5276 }, { "epoch": 0.09645931964830826, "grad_norm": 7.394241098382519, "learning_rate": 9.884683913784553e-06, "loss": 17.8212, "step": 5277 }, { "epoch": 0.09647759884475478, "grad_norm": 7.119784720405873, "learning_rate": 9.884620698253877e-06, "loss": 17.0399, "step": 5278 }, { "epoch": 0.09649587804120131, "grad_norm": 7.982731933158025, "learning_rate": 9.884557465603026e-06, "loss": 17.7377, "step": 5279 }, { "epoch": 0.09651415723764784, "grad_norm": 7.178376538639677, "learning_rate": 9.884494215832225e-06, "loss": 17.8772, "step": 5280 }, { "epoch": 0.09653243643409436, "grad_norm": 7.123912373964505, "learning_rate": 9.884430948941697e-06, "loss": 18.037, "step": 5281 }, { "epoch": 0.09655071563054088, "grad_norm": 7.554159375338772, "learning_rate": 9.884367664931663e-06, "loss": 17.9028, "step": 5282 }, { "epoch": 0.0965689948269874, "grad_norm": 6.008364724596991, "learning_rate": 9.884304363802342e-06, "loss": 17.0345, "step": 5283 }, { "epoch": 0.09658727402343394, "grad_norm": 5.624135781155062, "learning_rate": 9.88424104555396e-06, "loss": 16.9863, "step": 5284 }, { "epoch": 0.09660555321988046, "grad_norm": 6.8371078213504255, "learning_rate": 9.884177710186736e-06, "loss": 17.6411, "step": 5285 }, { "epoch": 0.09662383241632698, "grad_norm": 6.528436990810273, "learning_rate": 9.884114357700893e-06, "loss": 17.2872, "step": 5286 }, { "epoch": 0.0966421116127735, "grad_norm": 7.469541449794446, "learning_rate": 9.884050988096651e-06, "loss": 17.8118, "step": 5287 }, { "epoch": 0.09666039080922002, "grad_norm": 10.212809506208576, "learning_rate": 9.883987601374238e-06, "loss": 18.2187, "step": 5288 }, { "epoch": 0.09667867000566654, "grad_norm": 7.366905909371753, "learning_rate": 9.88392419753387e-06, "loss": 18.2504, "step": 5289 }, { "epoch": 0.09669694920211308, "grad_norm": 7.3333108584614495, "learning_rate": 9.883860776575772e-06, "loss": 17.779, "step": 5290 }, { "epoch": 0.0967152283985596, "grad_norm": 7.976466138224247, "learning_rate": 9.883797338500165e-06, "loss": 18.1815, "step": 5291 }, { "epoch": 0.09673350759500612, "grad_norm": 7.365136220235471, "learning_rate": 9.883733883307272e-06, "loss": 17.7525, "step": 5292 }, { "epoch": 0.09675178679145265, "grad_norm": 8.039554757285382, "learning_rate": 9.883670410997318e-06, "loss": 18.4228, "step": 5293 }, { "epoch": 0.09677006598789917, "grad_norm": 7.53287053112969, "learning_rate": 9.883606921570521e-06, "loss": 18.0332, "step": 5294 }, { "epoch": 0.09678834518434569, "grad_norm": 7.081858402661466, "learning_rate": 9.883543415027107e-06, "loss": 17.6161, "step": 5295 }, { "epoch": 0.09680662438079222, "grad_norm": 6.785646720458371, "learning_rate": 9.883479891367294e-06, "loss": 17.467, "step": 5296 }, { "epoch": 0.09682490357723875, "grad_norm": 7.477258930459312, "learning_rate": 9.88341635059131e-06, "loss": 18.217, "step": 5297 }, { "epoch": 0.09684318277368527, "grad_norm": 6.869902920200466, "learning_rate": 9.883352792699375e-06, "loss": 17.8621, "step": 5298 }, { "epoch": 0.09686146197013179, "grad_norm": 7.789336928668197, "learning_rate": 9.883289217691712e-06, "loss": 18.2331, "step": 5299 }, { "epoch": 0.09687974116657831, "grad_norm": 8.526389110172985, "learning_rate": 9.883225625568544e-06, "loss": 18.3998, "step": 5300 }, { "epoch": 0.09689802036302485, "grad_norm": 6.130281431362759, "learning_rate": 9.883162016330094e-06, "loss": 17.2481, "step": 5301 }, { "epoch": 0.09691629955947137, "grad_norm": 6.948104012679294, "learning_rate": 9.883098389976586e-06, "loss": 17.8148, "step": 5302 }, { "epoch": 0.09693457875591789, "grad_norm": 6.542422082061749, "learning_rate": 9.88303474650824e-06, "loss": 17.3871, "step": 5303 }, { "epoch": 0.09695285795236441, "grad_norm": 7.134880808521567, "learning_rate": 9.882971085925283e-06, "loss": 17.8606, "step": 5304 }, { "epoch": 0.09697113714881093, "grad_norm": 8.605364075819775, "learning_rate": 9.882907408227934e-06, "loss": 18.4823, "step": 5305 }, { "epoch": 0.09698941634525746, "grad_norm": 7.582757133706279, "learning_rate": 9.882843713416421e-06, "loss": 17.8743, "step": 5306 }, { "epoch": 0.09700769554170399, "grad_norm": 6.79791230027872, "learning_rate": 9.882780001490963e-06, "loss": 17.4309, "step": 5307 }, { "epoch": 0.09702597473815051, "grad_norm": 7.506229977541841, "learning_rate": 9.882716272451785e-06, "loss": 17.7997, "step": 5308 }, { "epoch": 0.09704425393459704, "grad_norm": 6.909299202803373, "learning_rate": 9.882652526299109e-06, "loss": 17.583, "step": 5309 }, { "epoch": 0.09706253313104356, "grad_norm": 8.08860684609603, "learning_rate": 9.88258876303316e-06, "loss": 17.9465, "step": 5310 }, { "epoch": 0.09708081232749008, "grad_norm": 6.7712705978128405, "learning_rate": 9.882524982654162e-06, "loss": 17.6611, "step": 5311 }, { "epoch": 0.0970990915239366, "grad_norm": 7.700096808984828, "learning_rate": 9.882461185162338e-06, "loss": 18.1099, "step": 5312 }, { "epoch": 0.09711737072038314, "grad_norm": 7.1087451538688, "learning_rate": 9.88239737055791e-06, "loss": 18.051, "step": 5313 }, { "epoch": 0.09713564991682966, "grad_norm": 6.897441875558762, "learning_rate": 9.882333538841103e-06, "loss": 17.5645, "step": 5314 }, { "epoch": 0.09715392911327618, "grad_norm": 6.684874766663511, "learning_rate": 9.88226969001214e-06, "loss": 17.5739, "step": 5315 }, { "epoch": 0.0971722083097227, "grad_norm": 6.078619167484666, "learning_rate": 9.882205824071246e-06, "loss": 17.1586, "step": 5316 }, { "epoch": 0.09719048750616922, "grad_norm": 6.92733611336219, "learning_rate": 9.882141941018644e-06, "loss": 17.6455, "step": 5317 }, { "epoch": 0.09720876670261576, "grad_norm": 7.5784526259969995, "learning_rate": 9.882078040854559e-06, "loss": 17.7522, "step": 5318 }, { "epoch": 0.09722704589906228, "grad_norm": 7.36961789501853, "learning_rate": 9.882014123579215e-06, "loss": 17.5854, "step": 5319 }, { "epoch": 0.0972453250955088, "grad_norm": 7.317098772859468, "learning_rate": 9.881950189192833e-06, "loss": 17.8084, "step": 5320 }, { "epoch": 0.09726360429195532, "grad_norm": 5.787194122942332, "learning_rate": 9.88188623769564e-06, "loss": 17.2275, "step": 5321 }, { "epoch": 0.09728188348840185, "grad_norm": 7.168416748681587, "learning_rate": 9.88182226908786e-06, "loss": 17.7341, "step": 5322 }, { "epoch": 0.09730016268484837, "grad_norm": 7.407609911779401, "learning_rate": 9.881758283369715e-06, "loss": 17.9142, "step": 5323 }, { "epoch": 0.0973184418812949, "grad_norm": 6.927321379262219, "learning_rate": 9.881694280541431e-06, "loss": 17.6755, "step": 5324 }, { "epoch": 0.09733672107774143, "grad_norm": 7.267632829932315, "learning_rate": 9.881630260603234e-06, "loss": 17.7475, "step": 5325 }, { "epoch": 0.09735500027418795, "grad_norm": 7.04217802331472, "learning_rate": 9.881566223555345e-06, "loss": 17.3577, "step": 5326 }, { "epoch": 0.09737327947063447, "grad_norm": 6.746752510385598, "learning_rate": 9.881502169397991e-06, "loss": 17.5972, "step": 5327 }, { "epoch": 0.09739155866708099, "grad_norm": 5.2912247121295435, "learning_rate": 9.881438098131395e-06, "loss": 17.1553, "step": 5328 }, { "epoch": 0.09740983786352751, "grad_norm": 7.325975113070174, "learning_rate": 9.881374009755784e-06, "loss": 18.1632, "step": 5329 }, { "epoch": 0.09742811705997405, "grad_norm": 6.080116422589618, "learning_rate": 9.881309904271378e-06, "loss": 17.3491, "step": 5330 }, { "epoch": 0.09744639625642057, "grad_norm": 6.3866380418005635, "learning_rate": 9.881245781678405e-06, "loss": 17.1599, "step": 5331 }, { "epoch": 0.09746467545286709, "grad_norm": 6.035010066941613, "learning_rate": 9.881181641977088e-06, "loss": 17.3709, "step": 5332 }, { "epoch": 0.09748295464931361, "grad_norm": 7.703056986080128, "learning_rate": 9.881117485167656e-06, "loss": 17.9805, "step": 5333 }, { "epoch": 0.09750123384576014, "grad_norm": 8.709080770062386, "learning_rate": 9.881053311250328e-06, "loss": 18.6102, "step": 5334 }, { "epoch": 0.09751951304220667, "grad_norm": 6.192408709014584, "learning_rate": 9.880989120225333e-06, "loss": 17.413, "step": 5335 }, { "epoch": 0.09753779223865319, "grad_norm": 8.105822771494752, "learning_rate": 9.880924912092894e-06, "loss": 18.212, "step": 5336 }, { "epoch": 0.09755607143509971, "grad_norm": 6.7656047760564775, "learning_rate": 9.880860686853238e-06, "loss": 17.779, "step": 5337 }, { "epoch": 0.09757435063154624, "grad_norm": 6.374458346957026, "learning_rate": 9.880796444506588e-06, "loss": 17.3893, "step": 5338 }, { "epoch": 0.09759262982799276, "grad_norm": 6.271245313507004, "learning_rate": 9.88073218505317e-06, "loss": 17.1529, "step": 5339 }, { "epoch": 0.09761090902443928, "grad_norm": 7.322123584958095, "learning_rate": 9.880667908493209e-06, "loss": 17.7049, "step": 5340 }, { "epoch": 0.09762918822088582, "grad_norm": 7.979414944996543, "learning_rate": 9.88060361482693e-06, "loss": 18.3133, "step": 5341 }, { "epoch": 0.09764746741733234, "grad_norm": 8.61766632091909, "learning_rate": 9.88053930405456e-06, "loss": 18.5276, "step": 5342 }, { "epoch": 0.09766574661377886, "grad_norm": 9.690909411811695, "learning_rate": 9.880474976176322e-06, "loss": 19.0181, "step": 5343 }, { "epoch": 0.09768402581022538, "grad_norm": 7.445909909009077, "learning_rate": 9.880410631192444e-06, "loss": 17.7228, "step": 5344 }, { "epoch": 0.0977023050066719, "grad_norm": 6.5210768427174095, "learning_rate": 9.88034626910315e-06, "loss": 17.4109, "step": 5345 }, { "epoch": 0.09772058420311842, "grad_norm": 7.888989937175442, "learning_rate": 9.880281889908665e-06, "loss": 18.2857, "step": 5346 }, { "epoch": 0.09773886339956496, "grad_norm": 7.253556566254523, "learning_rate": 9.880217493609216e-06, "loss": 17.6233, "step": 5347 }, { "epoch": 0.09775714259601148, "grad_norm": 7.493275082127145, "learning_rate": 9.880153080205028e-06, "loss": 17.9103, "step": 5348 }, { "epoch": 0.097775421792458, "grad_norm": 6.687304133962577, "learning_rate": 9.880088649696327e-06, "loss": 17.7951, "step": 5349 }, { "epoch": 0.09779370098890453, "grad_norm": 6.6487009943221835, "learning_rate": 9.88002420208334e-06, "loss": 17.6598, "step": 5350 }, { "epoch": 0.09781198018535105, "grad_norm": 6.052180943355565, "learning_rate": 9.879959737366289e-06, "loss": 16.9574, "step": 5351 }, { "epoch": 0.09783025938179758, "grad_norm": 7.663162660828646, "learning_rate": 9.879895255545405e-06, "loss": 18.334, "step": 5352 }, { "epoch": 0.0978485385782441, "grad_norm": 7.163208212979588, "learning_rate": 9.87983075662091e-06, "loss": 17.9333, "step": 5353 }, { "epoch": 0.09786681777469063, "grad_norm": 7.711414455564948, "learning_rate": 9.879766240593033e-06, "loss": 17.826, "step": 5354 }, { "epoch": 0.09788509697113715, "grad_norm": 9.910252461266577, "learning_rate": 9.879701707461998e-06, "loss": 18.5209, "step": 5355 }, { "epoch": 0.09790337616758367, "grad_norm": 7.544857749664136, "learning_rate": 9.879637157228032e-06, "loss": 17.5333, "step": 5356 }, { "epoch": 0.09792165536403019, "grad_norm": 7.099394526580665, "learning_rate": 9.87957258989136e-06, "loss": 17.7289, "step": 5357 }, { "epoch": 0.09793993456047673, "grad_norm": 8.002312473025757, "learning_rate": 9.879508005452212e-06, "loss": 18.2373, "step": 5358 }, { "epoch": 0.09795821375692325, "grad_norm": 6.8712882139932185, "learning_rate": 9.879443403910812e-06, "loss": 17.4482, "step": 5359 }, { "epoch": 0.09797649295336977, "grad_norm": 6.542254007103261, "learning_rate": 9.879378785267383e-06, "loss": 17.6043, "step": 5360 }, { "epoch": 0.09799477214981629, "grad_norm": 8.079079767229963, "learning_rate": 9.879314149522156e-06, "loss": 18.41, "step": 5361 }, { "epoch": 0.09801305134626281, "grad_norm": 7.230662786934817, "learning_rate": 9.879249496675359e-06, "loss": 17.8467, "step": 5362 }, { "epoch": 0.09803133054270934, "grad_norm": 8.237757438238472, "learning_rate": 9.879184826727213e-06, "loss": 18.0755, "step": 5363 }, { "epoch": 0.09804960973915587, "grad_norm": 6.64001757792681, "learning_rate": 9.879120139677949e-06, "loss": 17.7599, "step": 5364 }, { "epoch": 0.0980678889356024, "grad_norm": 9.00030856218242, "learning_rate": 9.87905543552779e-06, "loss": 18.5465, "step": 5365 }, { "epoch": 0.09808616813204891, "grad_norm": 5.9319514025512206, "learning_rate": 9.878990714276968e-06, "loss": 17.1628, "step": 5366 }, { "epoch": 0.09810444732849544, "grad_norm": 6.982178255073946, "learning_rate": 9.878925975925707e-06, "loss": 17.8482, "step": 5367 }, { "epoch": 0.09812272652494196, "grad_norm": 7.016553498668367, "learning_rate": 9.878861220474232e-06, "loss": 17.7667, "step": 5368 }, { "epoch": 0.0981410057213885, "grad_norm": 7.201736280021446, "learning_rate": 9.878796447922771e-06, "loss": 17.5916, "step": 5369 }, { "epoch": 0.09815928491783502, "grad_norm": 7.532481678533857, "learning_rate": 9.878731658271554e-06, "loss": 17.8754, "step": 5370 }, { "epoch": 0.09817756411428154, "grad_norm": 6.311230681650899, "learning_rate": 9.878666851520805e-06, "loss": 17.4986, "step": 5371 }, { "epoch": 0.09819584331072806, "grad_norm": 6.475750631197108, "learning_rate": 9.878602027670751e-06, "loss": 17.5613, "step": 5372 }, { "epoch": 0.09821412250717458, "grad_norm": 7.224910645473922, "learning_rate": 9.878537186721623e-06, "loss": 17.752, "step": 5373 }, { "epoch": 0.0982324017036211, "grad_norm": 8.045719608506186, "learning_rate": 9.878472328673643e-06, "loss": 18.1327, "step": 5374 }, { "epoch": 0.09825068090006764, "grad_norm": 7.44718921926065, "learning_rate": 9.878407453527042e-06, "loss": 18.0817, "step": 5375 }, { "epoch": 0.09826896009651416, "grad_norm": 8.031113172574356, "learning_rate": 9.878342561282046e-06, "loss": 17.434, "step": 5376 }, { "epoch": 0.09828723929296068, "grad_norm": 7.582931894439497, "learning_rate": 9.878277651938883e-06, "loss": 17.8379, "step": 5377 }, { "epoch": 0.0983055184894072, "grad_norm": 6.2691037363538324, "learning_rate": 9.87821272549778e-06, "loss": 17.4363, "step": 5378 }, { "epoch": 0.09832379768585373, "grad_norm": 7.910581747723305, "learning_rate": 9.878147781958965e-06, "loss": 18.1192, "step": 5379 }, { "epoch": 0.09834207688230025, "grad_norm": 7.379569009044979, "learning_rate": 9.878082821322663e-06, "loss": 18.0581, "step": 5380 }, { "epoch": 0.09836035607874678, "grad_norm": 6.527518056624541, "learning_rate": 9.878017843589107e-06, "loss": 17.4445, "step": 5381 }, { "epoch": 0.0983786352751933, "grad_norm": 6.78755253095713, "learning_rate": 9.877952848758519e-06, "loss": 17.6175, "step": 5382 }, { "epoch": 0.09839691447163983, "grad_norm": 6.977920251600107, "learning_rate": 9.877887836831132e-06, "loss": 17.6141, "step": 5383 }, { "epoch": 0.09841519366808635, "grad_norm": 8.290696312597492, "learning_rate": 9.87782280780717e-06, "loss": 18.2516, "step": 5384 }, { "epoch": 0.09843347286453287, "grad_norm": 6.44443039994811, "learning_rate": 9.877757761686864e-06, "loss": 17.2411, "step": 5385 }, { "epoch": 0.0984517520609794, "grad_norm": 8.039395493731158, "learning_rate": 9.877692698470438e-06, "loss": 18.1254, "step": 5386 }, { "epoch": 0.09847003125742593, "grad_norm": 6.174780650552495, "learning_rate": 9.877627618158123e-06, "loss": 17.3012, "step": 5387 }, { "epoch": 0.09848831045387245, "grad_norm": 8.564452027284869, "learning_rate": 9.877562520750148e-06, "loss": 18.3757, "step": 5388 }, { "epoch": 0.09850658965031897, "grad_norm": 12.723700125736002, "learning_rate": 9.877497406246739e-06, "loss": 18.9342, "step": 5389 }, { "epoch": 0.09852486884676549, "grad_norm": 6.570689834194891, "learning_rate": 9.877432274648125e-06, "loss": 17.6527, "step": 5390 }, { "epoch": 0.09854314804321201, "grad_norm": 7.212075969847427, "learning_rate": 9.877367125954532e-06, "loss": 17.6892, "step": 5391 }, { "epoch": 0.09856142723965855, "grad_norm": 7.162008135860617, "learning_rate": 9.877301960166192e-06, "loss": 17.8613, "step": 5392 }, { "epoch": 0.09857970643610507, "grad_norm": 7.799232092978829, "learning_rate": 9.877236777283332e-06, "loss": 17.9345, "step": 5393 }, { "epoch": 0.0985979856325516, "grad_norm": 6.31091864296637, "learning_rate": 9.877171577306181e-06, "loss": 17.4031, "step": 5394 }, { "epoch": 0.09861626482899812, "grad_norm": 6.791065851383708, "learning_rate": 9.877106360234964e-06, "loss": 17.6314, "step": 5395 }, { "epoch": 0.09863454402544464, "grad_norm": 7.314248203014236, "learning_rate": 9.877041126069917e-06, "loss": 17.7338, "step": 5396 }, { "epoch": 0.09865282322189116, "grad_norm": 8.803413494157793, "learning_rate": 9.876975874811261e-06, "loss": 18.8308, "step": 5397 }, { "epoch": 0.0986711024183377, "grad_norm": 11.129809467457516, "learning_rate": 9.876910606459228e-06, "loss": 18.3517, "step": 5398 }, { "epoch": 0.09868938161478422, "grad_norm": 6.960028940931708, "learning_rate": 9.876845321014047e-06, "loss": 17.7416, "step": 5399 }, { "epoch": 0.09870766081123074, "grad_norm": 7.766890205745459, "learning_rate": 9.876780018475947e-06, "loss": 18.0501, "step": 5400 }, { "epoch": 0.09872594000767726, "grad_norm": 6.609650701226153, "learning_rate": 9.876714698845153e-06, "loss": 17.2986, "step": 5401 }, { "epoch": 0.09874421920412378, "grad_norm": 7.285538815241017, "learning_rate": 9.876649362121901e-06, "loss": 17.8622, "step": 5402 }, { "epoch": 0.09876249840057032, "grad_norm": 7.016219968300518, "learning_rate": 9.876584008306414e-06, "loss": 17.8093, "step": 5403 }, { "epoch": 0.09878077759701684, "grad_norm": 7.575328071727429, "learning_rate": 9.876518637398924e-06, "loss": 17.7971, "step": 5404 }, { "epoch": 0.09879905679346336, "grad_norm": 5.507656527965496, "learning_rate": 9.87645324939966e-06, "loss": 17.1415, "step": 5405 }, { "epoch": 0.09881733598990988, "grad_norm": 7.515076034232811, "learning_rate": 9.87638784430885e-06, "loss": 17.5956, "step": 5406 }, { "epoch": 0.0988356151863564, "grad_norm": 6.698772758642591, "learning_rate": 9.876322422126722e-06, "loss": 17.5607, "step": 5407 }, { "epoch": 0.09885389438280293, "grad_norm": 7.084818200127224, "learning_rate": 9.87625698285351e-06, "loss": 17.8535, "step": 5408 }, { "epoch": 0.09887217357924946, "grad_norm": 7.361523689917995, "learning_rate": 9.876191526489438e-06, "loss": 17.7445, "step": 5409 }, { "epoch": 0.09889045277569598, "grad_norm": 7.5897634403978955, "learning_rate": 9.87612605303474e-06, "loss": 17.8592, "step": 5410 }, { "epoch": 0.0989087319721425, "grad_norm": 8.596622229806462, "learning_rate": 9.876060562489643e-06, "loss": 18.291, "step": 5411 }, { "epoch": 0.09892701116858903, "grad_norm": 7.167603474624208, "learning_rate": 9.875995054854375e-06, "loss": 17.9727, "step": 5412 }, { "epoch": 0.09894529036503555, "grad_norm": 7.666953240832883, "learning_rate": 9.875929530129167e-06, "loss": 18.0282, "step": 5413 }, { "epoch": 0.09896356956148207, "grad_norm": 7.604734768374753, "learning_rate": 9.875863988314252e-06, "loss": 18.2639, "step": 5414 }, { "epoch": 0.0989818487579286, "grad_norm": 7.861239661676397, "learning_rate": 9.875798429409855e-06, "loss": 18.2204, "step": 5415 }, { "epoch": 0.09900012795437513, "grad_norm": 7.2050207094450895, "learning_rate": 9.875732853416208e-06, "loss": 17.7534, "step": 5416 }, { "epoch": 0.09901840715082165, "grad_norm": 8.512749916335364, "learning_rate": 9.87566726033354e-06, "loss": 17.7452, "step": 5417 }, { "epoch": 0.09903668634726817, "grad_norm": 7.1990424107187305, "learning_rate": 9.875601650162082e-06, "loss": 17.7461, "step": 5418 }, { "epoch": 0.0990549655437147, "grad_norm": 7.561257344374474, "learning_rate": 9.875536022902064e-06, "loss": 17.911, "step": 5419 }, { "epoch": 0.09907324474016123, "grad_norm": 7.608397345256539, "learning_rate": 9.875470378553714e-06, "loss": 17.753, "step": 5420 }, { "epoch": 0.09909152393660775, "grad_norm": 6.4574803895087705, "learning_rate": 9.875404717117263e-06, "loss": 17.4264, "step": 5421 }, { "epoch": 0.09910980313305427, "grad_norm": 6.691709365344509, "learning_rate": 9.875339038592944e-06, "loss": 17.6796, "step": 5422 }, { "epoch": 0.0991280823295008, "grad_norm": 7.47764722151518, "learning_rate": 9.875273342980982e-06, "loss": 18.1239, "step": 5423 }, { "epoch": 0.09914636152594732, "grad_norm": 6.205851196218604, "learning_rate": 9.875207630281611e-06, "loss": 17.4564, "step": 5424 }, { "epoch": 0.09916464072239384, "grad_norm": 7.055443921268711, "learning_rate": 9.87514190049506e-06, "loss": 17.8814, "step": 5425 }, { "epoch": 0.09918291991884037, "grad_norm": 6.347899534679206, "learning_rate": 9.87507615362156e-06, "loss": 17.3062, "step": 5426 }, { "epoch": 0.0992011991152869, "grad_norm": 6.219401244317228, "learning_rate": 9.875010389661341e-06, "loss": 17.2959, "step": 5427 }, { "epoch": 0.09921947831173342, "grad_norm": 5.659634986824411, "learning_rate": 9.874944608614634e-06, "loss": 17.1571, "step": 5428 }, { "epoch": 0.09923775750817994, "grad_norm": 9.489737878088745, "learning_rate": 9.874878810481669e-06, "loss": 18.5848, "step": 5429 }, { "epoch": 0.09925603670462646, "grad_norm": 6.934344610746548, "learning_rate": 9.874812995262676e-06, "loss": 17.7542, "step": 5430 }, { "epoch": 0.09927431590107298, "grad_norm": 7.698012600565545, "learning_rate": 9.87474716295789e-06, "loss": 18.2417, "step": 5431 }, { "epoch": 0.09929259509751952, "grad_norm": 8.826267470753006, "learning_rate": 9.874681313567533e-06, "loss": 18.6452, "step": 5432 }, { "epoch": 0.09931087429396604, "grad_norm": 8.153216940800014, "learning_rate": 9.874615447091845e-06, "loss": 17.8559, "step": 5433 }, { "epoch": 0.09932915349041256, "grad_norm": 5.80915749006908, "learning_rate": 9.874549563531051e-06, "loss": 17.4492, "step": 5434 }, { "epoch": 0.09934743268685908, "grad_norm": 7.003977421272474, "learning_rate": 9.874483662885383e-06, "loss": 17.6575, "step": 5435 }, { "epoch": 0.0993657118833056, "grad_norm": 7.256371239184099, "learning_rate": 9.874417745155075e-06, "loss": 17.9347, "step": 5436 }, { "epoch": 0.09938399107975214, "grad_norm": 6.660592230882827, "learning_rate": 9.874351810340355e-06, "loss": 17.7508, "step": 5437 }, { "epoch": 0.09940227027619866, "grad_norm": 6.208497065931254, "learning_rate": 9.874285858441455e-06, "loss": 17.3796, "step": 5438 }, { "epoch": 0.09942054947264518, "grad_norm": 7.348407501302437, "learning_rate": 9.874219889458605e-06, "loss": 17.5801, "step": 5439 }, { "epoch": 0.0994388286690917, "grad_norm": 8.023627408244378, "learning_rate": 9.874153903392037e-06, "loss": 18.1776, "step": 5440 }, { "epoch": 0.09945710786553823, "grad_norm": 7.112190887858089, "learning_rate": 9.874087900241984e-06, "loss": 17.5814, "step": 5441 }, { "epoch": 0.09947538706198475, "grad_norm": 7.154703241715606, "learning_rate": 9.874021880008675e-06, "loss": 17.4067, "step": 5442 }, { "epoch": 0.09949366625843128, "grad_norm": 6.544009814900835, "learning_rate": 9.873955842692341e-06, "loss": 17.3562, "step": 5443 }, { "epoch": 0.0995119454548778, "grad_norm": 8.454864865370041, "learning_rate": 9.873889788293217e-06, "loss": 17.7853, "step": 5444 }, { "epoch": 0.09953022465132433, "grad_norm": 7.003491466502003, "learning_rate": 9.873823716811533e-06, "loss": 17.6397, "step": 5445 }, { "epoch": 0.09954850384777085, "grad_norm": 7.77339412187411, "learning_rate": 9.873757628247516e-06, "loss": 17.9778, "step": 5446 }, { "epoch": 0.09956678304421737, "grad_norm": 6.698825218030373, "learning_rate": 9.873691522601406e-06, "loss": 17.5973, "step": 5447 }, { "epoch": 0.0995850622406639, "grad_norm": 7.93327135814933, "learning_rate": 9.873625399873426e-06, "loss": 18.0847, "step": 5448 }, { "epoch": 0.09960334143711043, "grad_norm": 7.123565048979408, "learning_rate": 9.873559260063814e-06, "loss": 17.9277, "step": 5449 }, { "epoch": 0.09962162063355695, "grad_norm": 8.034482781471866, "learning_rate": 9.8734931031728e-06, "loss": 18.2641, "step": 5450 }, { "epoch": 0.09963989983000347, "grad_norm": 7.698896824331821, "learning_rate": 9.873426929200614e-06, "loss": 18.0796, "step": 5451 }, { "epoch": 0.09965817902645, "grad_norm": 6.570824200178621, "learning_rate": 9.87336073814749e-06, "loss": 17.3793, "step": 5452 }, { "epoch": 0.09967645822289652, "grad_norm": 6.639729025706504, "learning_rate": 9.873294530013661e-06, "loss": 17.6847, "step": 5453 }, { "epoch": 0.09969473741934305, "grad_norm": 7.220849341731857, "learning_rate": 9.873228304799357e-06, "loss": 17.5644, "step": 5454 }, { "epoch": 0.09971301661578957, "grad_norm": 7.309164793387815, "learning_rate": 9.87316206250481e-06, "loss": 18.1547, "step": 5455 }, { "epoch": 0.0997312958122361, "grad_norm": 7.2867175641362145, "learning_rate": 9.873095803130252e-06, "loss": 17.7135, "step": 5456 }, { "epoch": 0.09974957500868262, "grad_norm": 8.09917890677358, "learning_rate": 9.873029526675916e-06, "loss": 17.8355, "step": 5457 }, { "epoch": 0.09976785420512914, "grad_norm": 7.5774145356789955, "learning_rate": 9.872963233142037e-06, "loss": 17.8691, "step": 5458 }, { "epoch": 0.09978613340157566, "grad_norm": 7.1023109715224, "learning_rate": 9.872896922528842e-06, "loss": 17.4237, "step": 5459 }, { "epoch": 0.0998044125980222, "grad_norm": 7.081672996540094, "learning_rate": 9.872830594836568e-06, "loss": 17.5306, "step": 5460 }, { "epoch": 0.09982269179446872, "grad_norm": 7.587101306162368, "learning_rate": 9.872764250065445e-06, "loss": 17.9003, "step": 5461 }, { "epoch": 0.09984097099091524, "grad_norm": 7.449325638916134, "learning_rate": 9.872697888215706e-06, "loss": 17.6771, "step": 5462 }, { "epoch": 0.09985925018736176, "grad_norm": 7.989129904760626, "learning_rate": 9.872631509287585e-06, "loss": 18.3158, "step": 5463 }, { "epoch": 0.09987752938380828, "grad_norm": 7.944580874655752, "learning_rate": 9.872565113281312e-06, "loss": 17.7879, "step": 5464 }, { "epoch": 0.0998958085802548, "grad_norm": 8.284103749673918, "learning_rate": 9.872498700197121e-06, "loss": 17.8936, "step": 5465 }, { "epoch": 0.09991408777670134, "grad_norm": 8.227151047533471, "learning_rate": 9.872432270035245e-06, "loss": 17.9144, "step": 5466 }, { "epoch": 0.09993236697314786, "grad_norm": 6.448364487132646, "learning_rate": 9.872365822795917e-06, "loss": 17.2946, "step": 5467 }, { "epoch": 0.09995064616959438, "grad_norm": 8.653589408219817, "learning_rate": 9.872299358479369e-06, "loss": 18.6194, "step": 5468 }, { "epoch": 0.0999689253660409, "grad_norm": 8.562719262758154, "learning_rate": 9.872232877085835e-06, "loss": 18.3316, "step": 5469 }, { "epoch": 0.09998720456248743, "grad_norm": 8.422446447160278, "learning_rate": 9.872166378615547e-06, "loss": 18.5237, "step": 5470 }, { "epoch": 0.10000548375893396, "grad_norm": 7.674541089888339, "learning_rate": 9.87209986306874e-06, "loss": 17.6449, "step": 5471 }, { "epoch": 0.10002376295538049, "grad_norm": 6.9285136410043044, "learning_rate": 9.872033330445645e-06, "loss": 17.7269, "step": 5472 }, { "epoch": 0.10004204215182701, "grad_norm": 7.052208472841874, "learning_rate": 9.871966780746495e-06, "loss": 17.7994, "step": 5473 }, { "epoch": 0.10006032134827353, "grad_norm": 6.902007178782599, "learning_rate": 9.871900213971527e-06, "loss": 17.5682, "step": 5474 }, { "epoch": 0.10007860054472005, "grad_norm": 6.6627383854906626, "learning_rate": 9.871833630120968e-06, "loss": 17.4002, "step": 5475 }, { "epoch": 0.10009687974116657, "grad_norm": 7.244903023072964, "learning_rate": 9.871767029195058e-06, "loss": 17.9044, "step": 5476 }, { "epoch": 0.10011515893761311, "grad_norm": 7.31228976482119, "learning_rate": 9.871700411194025e-06, "loss": 17.7802, "step": 5477 }, { "epoch": 0.10013343813405963, "grad_norm": 7.614321893886776, "learning_rate": 9.871633776118106e-06, "loss": 17.8275, "step": 5478 }, { "epoch": 0.10015171733050615, "grad_norm": 8.593718313171816, "learning_rate": 9.871567123967533e-06, "loss": 18.3694, "step": 5479 }, { "epoch": 0.10016999652695267, "grad_norm": 7.081722185202732, "learning_rate": 9.87150045474254e-06, "loss": 17.7706, "step": 5480 }, { "epoch": 0.1001882757233992, "grad_norm": 7.434183754851058, "learning_rate": 9.87143376844336e-06, "loss": 17.7374, "step": 5481 }, { "epoch": 0.10020655491984572, "grad_norm": 8.14436549996752, "learning_rate": 9.871367065070228e-06, "loss": 18.0334, "step": 5482 }, { "epoch": 0.10022483411629225, "grad_norm": 6.6415783121783205, "learning_rate": 9.871300344623378e-06, "loss": 17.6521, "step": 5483 }, { "epoch": 0.10024311331273877, "grad_norm": 8.553573215241945, "learning_rate": 9.871233607103042e-06, "loss": 18.6418, "step": 5484 }, { "epoch": 0.1002613925091853, "grad_norm": 7.890120053246864, "learning_rate": 9.871166852509456e-06, "loss": 18.1467, "step": 5485 }, { "epoch": 0.10027967170563182, "grad_norm": 6.657974996457115, "learning_rate": 9.87110008084285e-06, "loss": 17.6226, "step": 5486 }, { "epoch": 0.10029795090207834, "grad_norm": 8.216213128635172, "learning_rate": 9.871033292103462e-06, "loss": 18.5853, "step": 5487 }, { "epoch": 0.10031623009852488, "grad_norm": 6.911988755346808, "learning_rate": 9.870966486291527e-06, "loss": 17.5637, "step": 5488 }, { "epoch": 0.1003345092949714, "grad_norm": 8.964046214265261, "learning_rate": 9.870899663407276e-06, "loss": 18.7191, "step": 5489 }, { "epoch": 0.10035278849141792, "grad_norm": 6.00516386692326, "learning_rate": 9.870832823450945e-06, "loss": 17.3014, "step": 5490 }, { "epoch": 0.10037106768786444, "grad_norm": 7.277179726087112, "learning_rate": 9.870765966422766e-06, "loss": 17.6824, "step": 5491 }, { "epoch": 0.10038934688431096, "grad_norm": 7.756126483330481, "learning_rate": 9.870699092322977e-06, "loss": 18.0805, "step": 5492 }, { "epoch": 0.10040762608075748, "grad_norm": 8.112557487227987, "learning_rate": 9.870632201151808e-06, "loss": 17.9869, "step": 5493 }, { "epoch": 0.10042590527720402, "grad_norm": 6.362690345339067, "learning_rate": 9.870565292909498e-06, "loss": 17.3655, "step": 5494 }, { "epoch": 0.10044418447365054, "grad_norm": 6.883410614087181, "learning_rate": 9.870498367596278e-06, "loss": 17.4808, "step": 5495 }, { "epoch": 0.10046246367009706, "grad_norm": 7.798277815013971, "learning_rate": 9.870431425212384e-06, "loss": 17.8913, "step": 5496 }, { "epoch": 0.10048074286654358, "grad_norm": 7.343465589360618, "learning_rate": 9.870364465758052e-06, "loss": 17.8053, "step": 5497 }, { "epoch": 0.1004990220629901, "grad_norm": 7.755293902232316, "learning_rate": 9.870297489233512e-06, "loss": 18.117, "step": 5498 }, { "epoch": 0.10051730125943663, "grad_norm": 6.895189993995462, "learning_rate": 9.870230495639004e-06, "loss": 17.591, "step": 5499 }, { "epoch": 0.10053558045588316, "grad_norm": 9.636986893842327, "learning_rate": 9.870163484974761e-06, "loss": 17.9947, "step": 5500 }, { "epoch": 0.10055385965232969, "grad_norm": 7.017073909550062, "learning_rate": 9.870096457241016e-06, "loss": 17.8571, "step": 5501 }, { "epoch": 0.10057213884877621, "grad_norm": 7.872760716791785, "learning_rate": 9.870029412438007e-06, "loss": 18.1904, "step": 5502 }, { "epoch": 0.10059041804522273, "grad_norm": 6.872200858062733, "learning_rate": 9.869962350565967e-06, "loss": 17.6585, "step": 5503 }, { "epoch": 0.10060869724166925, "grad_norm": 7.059145424434454, "learning_rate": 9.869895271625131e-06, "loss": 17.6791, "step": 5504 }, { "epoch": 0.10062697643811579, "grad_norm": 6.207781969423849, "learning_rate": 9.869828175615737e-06, "loss": 17.2415, "step": 5505 }, { "epoch": 0.10064525563456231, "grad_norm": 5.979674323002116, "learning_rate": 9.869761062538016e-06, "loss": 17.2384, "step": 5506 }, { "epoch": 0.10066353483100883, "grad_norm": 6.240413722493483, "learning_rate": 9.869693932392205e-06, "loss": 17.3594, "step": 5507 }, { "epoch": 0.10068181402745535, "grad_norm": 7.930051945652052, "learning_rate": 9.86962678517854e-06, "loss": 18.0924, "step": 5508 }, { "epoch": 0.10070009322390187, "grad_norm": 8.069429040495264, "learning_rate": 9.869559620897255e-06, "loss": 18.4979, "step": 5509 }, { "epoch": 0.1007183724203484, "grad_norm": 6.733855373581168, "learning_rate": 9.869492439548587e-06, "loss": 17.3737, "step": 5510 }, { "epoch": 0.10073665161679493, "grad_norm": 7.640919666323922, "learning_rate": 9.86942524113277e-06, "loss": 17.77, "step": 5511 }, { "epoch": 0.10075493081324145, "grad_norm": 6.810456747217254, "learning_rate": 9.86935802565004e-06, "loss": 17.5015, "step": 5512 }, { "epoch": 0.10077321000968797, "grad_norm": 7.358783064217443, "learning_rate": 9.869290793100631e-06, "loss": 17.9874, "step": 5513 }, { "epoch": 0.1007914892061345, "grad_norm": 7.266051700607326, "learning_rate": 9.869223543484782e-06, "loss": 17.5573, "step": 5514 }, { "epoch": 0.10080976840258102, "grad_norm": 7.477396230815225, "learning_rate": 9.869156276802729e-06, "loss": 17.7798, "step": 5515 }, { "epoch": 0.10082804759902754, "grad_norm": 8.008841306883555, "learning_rate": 9.869088993054703e-06, "loss": 18.2135, "step": 5516 }, { "epoch": 0.10084632679547408, "grad_norm": 7.280361453713527, "learning_rate": 9.869021692240943e-06, "loss": 17.9519, "step": 5517 }, { "epoch": 0.1008646059919206, "grad_norm": 6.833982330880781, "learning_rate": 9.868954374361685e-06, "loss": 17.7122, "step": 5518 }, { "epoch": 0.10088288518836712, "grad_norm": 8.068587522092342, "learning_rate": 9.868887039417163e-06, "loss": 18.1906, "step": 5519 }, { "epoch": 0.10090116438481364, "grad_norm": 7.51306169463813, "learning_rate": 9.868819687407616e-06, "loss": 17.8529, "step": 5520 }, { "epoch": 0.10091944358126016, "grad_norm": 6.7116284526960435, "learning_rate": 9.868752318333279e-06, "loss": 17.6203, "step": 5521 }, { "epoch": 0.1009377227777067, "grad_norm": 7.722773093298634, "learning_rate": 9.868684932194387e-06, "loss": 18.0917, "step": 5522 }, { "epoch": 0.10095600197415322, "grad_norm": 6.92277413530129, "learning_rate": 9.868617528991177e-06, "loss": 17.6773, "step": 5523 }, { "epoch": 0.10097428117059974, "grad_norm": 7.725717045192423, "learning_rate": 9.868550108723884e-06, "loss": 18.1749, "step": 5524 }, { "epoch": 0.10099256036704626, "grad_norm": 7.57412875694219, "learning_rate": 9.868482671392747e-06, "loss": 17.7129, "step": 5525 }, { "epoch": 0.10101083956349279, "grad_norm": 8.740221519151344, "learning_rate": 9.868415216998e-06, "loss": 18.6339, "step": 5526 }, { "epoch": 0.10102911875993931, "grad_norm": 7.5562932901323165, "learning_rate": 9.86834774553988e-06, "loss": 18.1447, "step": 5527 }, { "epoch": 0.10104739795638584, "grad_norm": 6.976972762478665, "learning_rate": 9.868280257018623e-06, "loss": 17.7476, "step": 5528 }, { "epoch": 0.10106567715283236, "grad_norm": 6.569964094907622, "learning_rate": 9.868212751434467e-06, "loss": 17.5981, "step": 5529 }, { "epoch": 0.10108395634927889, "grad_norm": 7.021708305851113, "learning_rate": 9.868145228787647e-06, "loss": 17.5095, "step": 5530 }, { "epoch": 0.10110223554572541, "grad_norm": 5.597428921716891, "learning_rate": 9.8680776890784e-06, "loss": 17.2427, "step": 5531 }, { "epoch": 0.10112051474217193, "grad_norm": 6.9568685905472805, "learning_rate": 9.868010132306965e-06, "loss": 17.5931, "step": 5532 }, { "epoch": 0.10113879393861845, "grad_norm": 8.334661748358617, "learning_rate": 9.867942558473575e-06, "loss": 18.3263, "step": 5533 }, { "epoch": 0.10115707313506499, "grad_norm": 8.3366835042184, "learning_rate": 9.86787496757847e-06, "loss": 18.0709, "step": 5534 }, { "epoch": 0.10117535233151151, "grad_norm": 7.667699375805215, "learning_rate": 9.867807359621885e-06, "loss": 17.7879, "step": 5535 }, { "epoch": 0.10119363152795803, "grad_norm": 8.824124664629514, "learning_rate": 9.867739734604059e-06, "loss": 18.2866, "step": 5536 }, { "epoch": 0.10121191072440455, "grad_norm": 7.017944125814402, "learning_rate": 9.867672092525224e-06, "loss": 17.7497, "step": 5537 }, { "epoch": 0.10123018992085107, "grad_norm": 8.18842017569041, "learning_rate": 9.867604433385625e-06, "loss": 18.0759, "step": 5538 }, { "epoch": 0.10124846911729761, "grad_norm": 7.997107795168364, "learning_rate": 9.867536757185491e-06, "loss": 18.5344, "step": 5539 }, { "epoch": 0.10126674831374413, "grad_norm": 7.065617845960572, "learning_rate": 9.867469063925065e-06, "loss": 17.7069, "step": 5540 }, { "epoch": 0.10128502751019065, "grad_norm": 8.911140431793497, "learning_rate": 9.867401353604582e-06, "loss": 18.6952, "step": 5541 }, { "epoch": 0.10130330670663718, "grad_norm": 7.068744400916903, "learning_rate": 9.867333626224282e-06, "loss": 17.6487, "step": 5542 }, { "epoch": 0.1013215859030837, "grad_norm": 6.838812273696042, "learning_rate": 9.867265881784399e-06, "loss": 17.5511, "step": 5543 }, { "epoch": 0.10133986509953022, "grad_norm": 7.812591132063666, "learning_rate": 9.867198120285169e-06, "loss": 18.1763, "step": 5544 }, { "epoch": 0.10135814429597675, "grad_norm": 6.59172368394794, "learning_rate": 9.867130341726835e-06, "loss": 17.3611, "step": 5545 }, { "epoch": 0.10137642349242328, "grad_norm": 5.833388458503554, "learning_rate": 9.867062546109627e-06, "loss": 17.216, "step": 5546 }, { "epoch": 0.1013947026888698, "grad_norm": 7.554618888036021, "learning_rate": 9.866994733433792e-06, "loss": 17.9252, "step": 5547 }, { "epoch": 0.10141298188531632, "grad_norm": 6.992305674904265, "learning_rate": 9.866926903699561e-06, "loss": 17.5893, "step": 5548 }, { "epoch": 0.10143126108176284, "grad_norm": 7.343410832162275, "learning_rate": 9.866859056907171e-06, "loss": 17.8714, "step": 5549 }, { "epoch": 0.10144954027820936, "grad_norm": 6.688629495531107, "learning_rate": 9.866791193056866e-06, "loss": 17.5182, "step": 5550 }, { "epoch": 0.1014678194746559, "grad_norm": 8.989511728832127, "learning_rate": 9.86672331214888e-06, "loss": 18.3834, "step": 5551 }, { "epoch": 0.10148609867110242, "grad_norm": 7.0848265652247235, "learning_rate": 9.86665541418345e-06, "loss": 17.724, "step": 5552 }, { "epoch": 0.10150437786754894, "grad_norm": 6.465072724557792, "learning_rate": 9.866587499160813e-06, "loss": 17.5554, "step": 5553 }, { "epoch": 0.10152265706399546, "grad_norm": 8.03355687173735, "learning_rate": 9.866519567081213e-06, "loss": 18.4101, "step": 5554 }, { "epoch": 0.10154093626044199, "grad_norm": 7.843843302158278, "learning_rate": 9.866451617944881e-06, "loss": 17.962, "step": 5555 }, { "epoch": 0.10155921545688852, "grad_norm": 8.061815569234346, "learning_rate": 9.86638365175206e-06, "loss": 18.1134, "step": 5556 }, { "epoch": 0.10157749465333504, "grad_norm": 6.913228063862076, "learning_rate": 9.866315668502986e-06, "loss": 17.8261, "step": 5557 }, { "epoch": 0.10159577384978156, "grad_norm": 7.602059052770785, "learning_rate": 9.8662476681979e-06, "loss": 17.9167, "step": 5558 }, { "epoch": 0.10161405304622809, "grad_norm": 7.284534676397395, "learning_rate": 9.866179650837035e-06, "loss": 17.9744, "step": 5559 }, { "epoch": 0.10163233224267461, "grad_norm": 8.078679404599374, "learning_rate": 9.866111616420635e-06, "loss": 17.7994, "step": 5560 }, { "epoch": 0.10165061143912113, "grad_norm": 7.880967909463967, "learning_rate": 9.866043564948935e-06, "loss": 17.8808, "step": 5561 }, { "epoch": 0.10166889063556767, "grad_norm": 7.9904514906741415, "learning_rate": 9.865975496422175e-06, "loss": 18.4223, "step": 5562 }, { "epoch": 0.10168716983201419, "grad_norm": 7.984160213272381, "learning_rate": 9.865907410840592e-06, "loss": 18.4557, "step": 5563 }, { "epoch": 0.10170544902846071, "grad_norm": 7.195063687482623, "learning_rate": 9.865839308204425e-06, "loss": 17.646, "step": 5564 }, { "epoch": 0.10172372822490723, "grad_norm": 8.913339901124274, "learning_rate": 9.865771188513917e-06, "loss": 18.6782, "step": 5565 }, { "epoch": 0.10174200742135375, "grad_norm": 6.952717261414831, "learning_rate": 9.8657030517693e-06, "loss": 17.8719, "step": 5566 }, { "epoch": 0.10176028661780027, "grad_norm": 7.409917137842797, "learning_rate": 9.865634897970817e-06, "loss": 18.1012, "step": 5567 }, { "epoch": 0.10177856581424681, "grad_norm": 6.704729071553811, "learning_rate": 9.865566727118708e-06, "loss": 17.6171, "step": 5568 }, { "epoch": 0.10179684501069333, "grad_norm": 6.115780679568604, "learning_rate": 9.865498539213207e-06, "loss": 17.1877, "step": 5569 }, { "epoch": 0.10181512420713985, "grad_norm": 7.119916928732996, "learning_rate": 9.865430334254557e-06, "loss": 17.8221, "step": 5570 }, { "epoch": 0.10183340340358638, "grad_norm": 6.134259618783842, "learning_rate": 9.865362112242995e-06, "loss": 17.0704, "step": 5571 }, { "epoch": 0.1018516826000329, "grad_norm": 7.978678414155514, "learning_rate": 9.865293873178762e-06, "loss": 18.0233, "step": 5572 }, { "epoch": 0.10186996179647943, "grad_norm": 7.215070387118926, "learning_rate": 9.865225617062096e-06, "loss": 17.801, "step": 5573 }, { "epoch": 0.10188824099292595, "grad_norm": 7.31919282192941, "learning_rate": 9.865157343893238e-06, "loss": 17.9607, "step": 5574 }, { "epoch": 0.10190652018937248, "grad_norm": 7.913734248080607, "learning_rate": 9.865089053672422e-06, "loss": 18.0854, "step": 5575 }, { "epoch": 0.101924799385819, "grad_norm": 6.924365059570353, "learning_rate": 9.865020746399894e-06, "loss": 17.7547, "step": 5576 }, { "epoch": 0.10194307858226552, "grad_norm": 8.725882960381123, "learning_rate": 9.864952422075889e-06, "loss": 18.7398, "step": 5577 }, { "epoch": 0.10196135777871204, "grad_norm": 7.353269797435892, "learning_rate": 9.864884080700648e-06, "loss": 18.0692, "step": 5578 }, { "epoch": 0.10197963697515858, "grad_norm": 6.545132518592989, "learning_rate": 9.86481572227441e-06, "loss": 17.5665, "step": 5579 }, { "epoch": 0.1019979161716051, "grad_norm": 7.193654720088067, "learning_rate": 9.864747346797416e-06, "loss": 17.8518, "step": 5580 }, { "epoch": 0.10201619536805162, "grad_norm": 6.715525651364767, "learning_rate": 9.864678954269904e-06, "loss": 17.4871, "step": 5581 }, { "epoch": 0.10203447456449814, "grad_norm": 6.364410358375826, "learning_rate": 9.864610544692115e-06, "loss": 17.2479, "step": 5582 }, { "epoch": 0.10205275376094466, "grad_norm": 7.787402952253585, "learning_rate": 9.864542118064289e-06, "loss": 18.2042, "step": 5583 }, { "epoch": 0.10207103295739119, "grad_norm": 8.656929520314254, "learning_rate": 9.864473674386663e-06, "loss": 18.2817, "step": 5584 }, { "epoch": 0.10208931215383772, "grad_norm": 7.1349927267277815, "learning_rate": 9.86440521365948e-06, "loss": 17.9565, "step": 5585 }, { "epoch": 0.10210759135028424, "grad_norm": 8.451405774295017, "learning_rate": 9.86433673588298e-06, "loss": 18.2044, "step": 5586 }, { "epoch": 0.10212587054673077, "grad_norm": 6.227900189800435, "learning_rate": 9.8642682410574e-06, "loss": 17.0415, "step": 5587 }, { "epoch": 0.10214414974317729, "grad_norm": 7.342311981082835, "learning_rate": 9.864199729182983e-06, "loss": 17.8797, "step": 5588 }, { "epoch": 0.10216242893962381, "grad_norm": 7.0039651354517805, "learning_rate": 9.864131200259967e-06, "loss": 17.7251, "step": 5589 }, { "epoch": 0.10218070813607034, "grad_norm": 6.62975218233901, "learning_rate": 9.864062654288595e-06, "loss": 17.4632, "step": 5590 }, { "epoch": 0.10219898733251687, "grad_norm": 7.889146122312645, "learning_rate": 9.863994091269104e-06, "loss": 18.0917, "step": 5591 }, { "epoch": 0.10221726652896339, "grad_norm": 7.2736825293141285, "learning_rate": 9.863925511201737e-06, "loss": 17.9487, "step": 5592 }, { "epoch": 0.10223554572540991, "grad_norm": 6.440638029377337, "learning_rate": 9.863856914086732e-06, "loss": 17.2682, "step": 5593 }, { "epoch": 0.10225382492185643, "grad_norm": 7.658628036651079, "learning_rate": 9.86378829992433e-06, "loss": 17.8727, "step": 5594 }, { "epoch": 0.10227210411830295, "grad_norm": 6.086356623607372, "learning_rate": 9.863719668714774e-06, "loss": 17.2037, "step": 5595 }, { "epoch": 0.10229038331474949, "grad_norm": 6.786964768966258, "learning_rate": 9.8636510204583e-06, "loss": 17.5606, "step": 5596 }, { "epoch": 0.10230866251119601, "grad_norm": 7.003310323853189, "learning_rate": 9.863582355155154e-06, "loss": 17.6939, "step": 5597 }, { "epoch": 0.10232694170764253, "grad_norm": 7.884423014166429, "learning_rate": 9.863513672805572e-06, "loss": 18.2019, "step": 5598 }, { "epoch": 0.10234522090408905, "grad_norm": 7.115546747698863, "learning_rate": 9.863444973409797e-06, "loss": 17.6195, "step": 5599 }, { "epoch": 0.10236350010053558, "grad_norm": 7.908531301386409, "learning_rate": 9.86337625696807e-06, "loss": 17.6112, "step": 5600 }, { "epoch": 0.1023817792969821, "grad_norm": 9.359206717718962, "learning_rate": 9.86330752348063e-06, "loss": 18.813, "step": 5601 }, { "epoch": 0.10240005849342863, "grad_norm": 7.512060769664707, "learning_rate": 9.86323877294772e-06, "loss": 18.0456, "step": 5602 }, { "epoch": 0.10241833768987516, "grad_norm": 7.484972278754374, "learning_rate": 9.863170005369581e-06, "loss": 17.7385, "step": 5603 }, { "epoch": 0.10243661688632168, "grad_norm": 7.909413744077415, "learning_rate": 9.863101220746452e-06, "loss": 18.1526, "step": 5604 }, { "epoch": 0.1024548960827682, "grad_norm": 6.501674884686735, "learning_rate": 9.863032419078576e-06, "loss": 17.4334, "step": 5605 }, { "epoch": 0.10247317527921472, "grad_norm": 7.725305001947346, "learning_rate": 9.862963600366193e-06, "loss": 17.8977, "step": 5606 }, { "epoch": 0.10249145447566126, "grad_norm": 7.262891534764362, "learning_rate": 9.862894764609545e-06, "loss": 17.6885, "step": 5607 }, { "epoch": 0.10250973367210778, "grad_norm": 5.806943049019033, "learning_rate": 9.862825911808872e-06, "loss": 17.182, "step": 5608 }, { "epoch": 0.1025280128685543, "grad_norm": 6.841754023206308, "learning_rate": 9.862757041964417e-06, "loss": 17.8432, "step": 5609 }, { "epoch": 0.10254629206500082, "grad_norm": 8.892583362257891, "learning_rate": 9.862688155076418e-06, "loss": 18.766, "step": 5610 }, { "epoch": 0.10256457126144734, "grad_norm": 6.444854193252805, "learning_rate": 9.862619251145123e-06, "loss": 17.5268, "step": 5611 }, { "epoch": 0.10258285045789386, "grad_norm": 7.675333679960771, "learning_rate": 9.862550330170767e-06, "loss": 17.7674, "step": 5612 }, { "epoch": 0.1026011296543404, "grad_norm": 6.055736683653771, "learning_rate": 9.862481392153595e-06, "loss": 17.2301, "step": 5613 }, { "epoch": 0.10261940885078692, "grad_norm": 7.384604145089058, "learning_rate": 9.862412437093846e-06, "loss": 17.5344, "step": 5614 }, { "epoch": 0.10263768804723344, "grad_norm": 7.949191918095978, "learning_rate": 9.862343464991765e-06, "loss": 17.8813, "step": 5615 }, { "epoch": 0.10265596724367997, "grad_norm": 8.488756267526043, "learning_rate": 9.862274475847591e-06, "loss": 18.6542, "step": 5616 }, { "epoch": 0.10267424644012649, "grad_norm": 8.487859338016493, "learning_rate": 9.862205469661567e-06, "loss": 18.5466, "step": 5617 }, { "epoch": 0.10269252563657301, "grad_norm": 7.933801626997772, "learning_rate": 9.862136446433936e-06, "loss": 18.0013, "step": 5618 }, { "epoch": 0.10271080483301955, "grad_norm": 7.866191316386422, "learning_rate": 9.862067406164939e-06, "loss": 17.9831, "step": 5619 }, { "epoch": 0.10272908402946607, "grad_norm": 6.736840911650169, "learning_rate": 9.861998348854815e-06, "loss": 17.8773, "step": 5620 }, { "epoch": 0.10274736322591259, "grad_norm": 8.099231571762177, "learning_rate": 9.861929274503812e-06, "loss": 18.3194, "step": 5621 }, { "epoch": 0.10276564242235911, "grad_norm": 8.313637183638072, "learning_rate": 9.861860183112167e-06, "loss": 18.4127, "step": 5622 }, { "epoch": 0.10278392161880563, "grad_norm": 8.375544465998125, "learning_rate": 9.861791074680123e-06, "loss": 18.2892, "step": 5623 }, { "epoch": 0.10280220081525217, "grad_norm": 7.308802441418126, "learning_rate": 9.861721949207924e-06, "loss": 17.8897, "step": 5624 }, { "epoch": 0.10282048001169869, "grad_norm": 7.4435089226258935, "learning_rate": 9.861652806695811e-06, "loss": 18.1091, "step": 5625 }, { "epoch": 0.10283875920814521, "grad_norm": 7.672842510905069, "learning_rate": 9.861583647144028e-06, "loss": 18.1591, "step": 5626 }, { "epoch": 0.10285703840459173, "grad_norm": 6.804388092466486, "learning_rate": 9.861514470552817e-06, "loss": 17.5523, "step": 5627 }, { "epoch": 0.10287531760103825, "grad_norm": 7.947931883356575, "learning_rate": 9.861445276922416e-06, "loss": 17.6266, "step": 5628 }, { "epoch": 0.10289359679748478, "grad_norm": 6.9495225887857135, "learning_rate": 9.861376066253075e-06, "loss": 17.6151, "step": 5629 }, { "epoch": 0.10291187599393131, "grad_norm": 7.0574007178605696, "learning_rate": 9.861306838545031e-06, "loss": 17.5255, "step": 5630 }, { "epoch": 0.10293015519037783, "grad_norm": 7.42909000481963, "learning_rate": 9.86123759379853e-06, "loss": 18.3477, "step": 5631 }, { "epoch": 0.10294843438682436, "grad_norm": 7.400343424466621, "learning_rate": 9.86116833201381e-06, "loss": 17.5692, "step": 5632 }, { "epoch": 0.10296671358327088, "grad_norm": 6.656659719531691, "learning_rate": 9.86109905319112e-06, "loss": 17.5363, "step": 5633 }, { "epoch": 0.1029849927797174, "grad_norm": 8.869011790536229, "learning_rate": 9.861029757330696e-06, "loss": 18.233, "step": 5634 }, { "epoch": 0.10300327197616392, "grad_norm": 10.78138637955777, "learning_rate": 9.860960444432788e-06, "loss": 19.547, "step": 5635 }, { "epoch": 0.10302155117261046, "grad_norm": 8.69105743680655, "learning_rate": 9.860891114497632e-06, "loss": 18.1938, "step": 5636 }, { "epoch": 0.10303983036905698, "grad_norm": 7.406666591015115, "learning_rate": 9.860821767525478e-06, "loss": 17.8067, "step": 5637 }, { "epoch": 0.1030581095655035, "grad_norm": 7.236512474195321, "learning_rate": 9.860752403516565e-06, "loss": 17.8224, "step": 5638 }, { "epoch": 0.10307638876195002, "grad_norm": 6.740368023501267, "learning_rate": 9.860683022471134e-06, "loss": 17.7572, "step": 5639 }, { "epoch": 0.10309466795839654, "grad_norm": 6.801684355389477, "learning_rate": 9.860613624389433e-06, "loss": 17.1892, "step": 5640 }, { "epoch": 0.10311294715484308, "grad_norm": 6.728856808681663, "learning_rate": 9.860544209271702e-06, "loss": 17.2071, "step": 5641 }, { "epoch": 0.1031312263512896, "grad_norm": 6.5262368131426785, "learning_rate": 9.860474777118186e-06, "loss": 17.3267, "step": 5642 }, { "epoch": 0.10314950554773612, "grad_norm": 6.389057697504511, "learning_rate": 9.860405327929127e-06, "loss": 17.3653, "step": 5643 }, { "epoch": 0.10316778474418264, "grad_norm": 8.191552019361835, "learning_rate": 9.860335861704771e-06, "loss": 18.0956, "step": 5644 }, { "epoch": 0.10318606394062917, "grad_norm": 6.7991328360954, "learning_rate": 9.860266378445357e-06, "loss": 17.534, "step": 5645 }, { "epoch": 0.10320434313707569, "grad_norm": 7.887544108450004, "learning_rate": 9.860196878151132e-06, "loss": 17.6669, "step": 5646 }, { "epoch": 0.10322262233352222, "grad_norm": 7.62854995909802, "learning_rate": 9.86012736082234e-06, "loss": 18.1849, "step": 5647 }, { "epoch": 0.10324090152996875, "grad_norm": 8.87544728712017, "learning_rate": 9.860057826459221e-06, "loss": 18.2109, "step": 5648 }, { "epoch": 0.10325918072641527, "grad_norm": 7.432743426683649, "learning_rate": 9.859988275062023e-06, "loss": 18.0735, "step": 5649 }, { "epoch": 0.10327745992286179, "grad_norm": 7.19049960693097, "learning_rate": 9.859918706630988e-06, "loss": 17.8247, "step": 5650 }, { "epoch": 0.10329573911930831, "grad_norm": 8.581469903701562, "learning_rate": 9.859849121166358e-06, "loss": 18.3868, "step": 5651 }, { "epoch": 0.10331401831575483, "grad_norm": 7.167860600574844, "learning_rate": 9.85977951866838e-06, "loss": 17.7402, "step": 5652 }, { "epoch": 0.10333229751220137, "grad_norm": 7.317461419644228, "learning_rate": 9.859709899137296e-06, "loss": 17.716, "step": 5653 }, { "epoch": 0.10335057670864789, "grad_norm": 8.126845056444337, "learning_rate": 9.85964026257335e-06, "loss": 17.937, "step": 5654 }, { "epoch": 0.10336885590509441, "grad_norm": 6.775535340147825, "learning_rate": 9.859570608976788e-06, "loss": 17.7823, "step": 5655 }, { "epoch": 0.10338713510154093, "grad_norm": 8.02478382232567, "learning_rate": 9.85950093834785e-06, "loss": 18.0325, "step": 5656 }, { "epoch": 0.10340541429798746, "grad_norm": 7.84770673825979, "learning_rate": 9.859431250686786e-06, "loss": 18.0718, "step": 5657 }, { "epoch": 0.10342369349443399, "grad_norm": 7.928305293440953, "learning_rate": 9.859361545993835e-06, "loss": 18.3205, "step": 5658 }, { "epoch": 0.10344197269088051, "grad_norm": 6.308012622949614, "learning_rate": 9.859291824269244e-06, "loss": 17.5742, "step": 5659 }, { "epoch": 0.10346025188732703, "grad_norm": 6.553638426485782, "learning_rate": 9.859222085513257e-06, "loss": 17.3746, "step": 5660 }, { "epoch": 0.10347853108377356, "grad_norm": 7.8237799859017985, "learning_rate": 9.859152329726119e-06, "loss": 18.0363, "step": 5661 }, { "epoch": 0.10349681028022008, "grad_norm": 8.70988408675445, "learning_rate": 9.859082556908074e-06, "loss": 18.5131, "step": 5662 }, { "epoch": 0.1035150894766666, "grad_norm": 5.983760418825584, "learning_rate": 9.859012767059364e-06, "loss": 17.1997, "step": 5663 }, { "epoch": 0.10353336867311314, "grad_norm": 6.314545435734154, "learning_rate": 9.858942960180236e-06, "loss": 17.3162, "step": 5664 }, { "epoch": 0.10355164786955966, "grad_norm": 7.783637790785526, "learning_rate": 9.858873136270936e-06, "loss": 18.0471, "step": 5665 }, { "epoch": 0.10356992706600618, "grad_norm": 8.911846521787316, "learning_rate": 9.858803295331708e-06, "loss": 18.5961, "step": 5666 }, { "epoch": 0.1035882062624527, "grad_norm": 7.104290413771939, "learning_rate": 9.858733437362794e-06, "loss": 17.9537, "step": 5667 }, { "epoch": 0.10360648545889922, "grad_norm": 7.441246196882701, "learning_rate": 9.858663562364442e-06, "loss": 17.6768, "step": 5668 }, { "epoch": 0.10362476465534574, "grad_norm": 6.230320373537345, "learning_rate": 9.858593670336896e-06, "loss": 17.5156, "step": 5669 }, { "epoch": 0.10364304385179228, "grad_norm": 7.054016654154026, "learning_rate": 9.8585237612804e-06, "loss": 18.0473, "step": 5670 }, { "epoch": 0.1036613230482388, "grad_norm": 6.80265935611233, "learning_rate": 9.858453835195201e-06, "loss": 17.611, "step": 5671 }, { "epoch": 0.10367960224468532, "grad_norm": 8.505054168058228, "learning_rate": 9.858383892081543e-06, "loss": 18.0694, "step": 5672 }, { "epoch": 0.10369788144113185, "grad_norm": 7.974423983390342, "learning_rate": 9.858313931939671e-06, "loss": 17.7729, "step": 5673 }, { "epoch": 0.10371616063757837, "grad_norm": 7.314855359130303, "learning_rate": 9.858243954769828e-06, "loss": 17.6772, "step": 5674 }, { "epoch": 0.1037344398340249, "grad_norm": 6.987436129085345, "learning_rate": 9.858173960572263e-06, "loss": 17.7026, "step": 5675 }, { "epoch": 0.10375271903047142, "grad_norm": 6.998286275181286, "learning_rate": 9.85810394934722e-06, "loss": 17.6099, "step": 5676 }, { "epoch": 0.10377099822691795, "grad_norm": 7.006892086593356, "learning_rate": 9.858033921094945e-06, "loss": 17.7675, "step": 5677 }, { "epoch": 0.10378927742336447, "grad_norm": 6.879778033254961, "learning_rate": 9.85796387581568e-06, "loss": 17.5298, "step": 5678 }, { "epoch": 0.10380755661981099, "grad_norm": 6.76985971738366, "learning_rate": 9.857893813509679e-06, "loss": 17.6621, "step": 5679 }, { "epoch": 0.10382583581625751, "grad_norm": 6.72691845560067, "learning_rate": 9.857823734177176e-06, "loss": 17.5424, "step": 5680 }, { "epoch": 0.10384411501270405, "grad_norm": 8.53200920093947, "learning_rate": 9.857753637818424e-06, "loss": 18.1764, "step": 5681 }, { "epoch": 0.10386239420915057, "grad_norm": 7.741867404671852, "learning_rate": 9.857683524433667e-06, "loss": 17.9676, "step": 5682 }, { "epoch": 0.10388067340559709, "grad_norm": 7.869945756666038, "learning_rate": 9.857613394023151e-06, "loss": 18.0102, "step": 5683 }, { "epoch": 0.10389895260204361, "grad_norm": 8.34587849748405, "learning_rate": 9.857543246587123e-06, "loss": 18.0476, "step": 5684 }, { "epoch": 0.10391723179849013, "grad_norm": 7.460410896072189, "learning_rate": 9.857473082125826e-06, "loss": 17.6671, "step": 5685 }, { "epoch": 0.10393551099493666, "grad_norm": 5.469985484096757, "learning_rate": 9.857402900639506e-06, "loss": 17.071, "step": 5686 }, { "epoch": 0.10395379019138319, "grad_norm": 7.54252787671895, "learning_rate": 9.857332702128413e-06, "loss": 18.142, "step": 5687 }, { "epoch": 0.10397206938782971, "grad_norm": 7.176446966677361, "learning_rate": 9.857262486592789e-06, "loss": 17.9131, "step": 5688 }, { "epoch": 0.10399034858427623, "grad_norm": 7.231311940389295, "learning_rate": 9.857192254032881e-06, "loss": 17.7265, "step": 5689 }, { "epoch": 0.10400862778072276, "grad_norm": 5.430374545880149, "learning_rate": 9.857122004448937e-06, "loss": 16.9641, "step": 5690 }, { "epoch": 0.10402690697716928, "grad_norm": 6.5152725917889835, "learning_rate": 9.8570517378412e-06, "loss": 17.713, "step": 5691 }, { "epoch": 0.10404518617361581, "grad_norm": 6.808305671955995, "learning_rate": 9.856981454209919e-06, "loss": 17.5756, "step": 5692 }, { "epoch": 0.10406346537006234, "grad_norm": 7.811642796693837, "learning_rate": 9.856911153555339e-06, "loss": 18.1096, "step": 5693 }, { "epoch": 0.10408174456650886, "grad_norm": 7.481230820798903, "learning_rate": 9.856840835877708e-06, "loss": 17.877, "step": 5694 }, { "epoch": 0.10410002376295538, "grad_norm": 8.124339711772812, "learning_rate": 9.85677050117727e-06, "loss": 18.1425, "step": 5695 }, { "epoch": 0.1041183029594019, "grad_norm": 7.089440559584176, "learning_rate": 9.856700149454274e-06, "loss": 18.2303, "step": 5696 }, { "epoch": 0.10413658215584842, "grad_norm": 6.9627328745022545, "learning_rate": 9.856629780708966e-06, "loss": 17.7311, "step": 5697 }, { "epoch": 0.10415486135229496, "grad_norm": 7.262370127311515, "learning_rate": 9.856559394941591e-06, "loss": 17.6369, "step": 5698 }, { "epoch": 0.10417314054874148, "grad_norm": 6.9301616574918805, "learning_rate": 9.856488992152398e-06, "loss": 17.872, "step": 5699 }, { "epoch": 0.104191419745188, "grad_norm": 8.421575270284432, "learning_rate": 9.85641857234163e-06, "loss": 18.5551, "step": 5700 }, { "epoch": 0.10420969894163452, "grad_norm": 8.354074911118072, "learning_rate": 9.85634813550954e-06, "loss": 18.44, "step": 5701 }, { "epoch": 0.10422797813808105, "grad_norm": 7.699607647268898, "learning_rate": 9.856277681656367e-06, "loss": 18.0969, "step": 5702 }, { "epoch": 0.10424625733452757, "grad_norm": 6.76819580678942, "learning_rate": 9.856207210782365e-06, "loss": 17.6372, "step": 5703 }, { "epoch": 0.1042645365309741, "grad_norm": 8.428388101099076, "learning_rate": 9.856136722887777e-06, "loss": 18.3778, "step": 5704 }, { "epoch": 0.10428281572742062, "grad_norm": 6.669753854352136, "learning_rate": 9.85606621797285e-06, "loss": 17.6416, "step": 5705 }, { "epoch": 0.10430109492386715, "grad_norm": 6.133803018299268, "learning_rate": 9.855995696037835e-06, "loss": 17.3228, "step": 5706 }, { "epoch": 0.10431937412031367, "grad_norm": 7.26969586228127, "learning_rate": 9.855925157082975e-06, "loss": 17.8272, "step": 5707 }, { "epoch": 0.10433765331676019, "grad_norm": 6.995612745175263, "learning_rate": 9.85585460110852e-06, "loss": 17.711, "step": 5708 }, { "epoch": 0.10435593251320673, "grad_norm": 6.963593528382055, "learning_rate": 9.855784028114715e-06, "loss": 17.802, "step": 5709 }, { "epoch": 0.10437421170965325, "grad_norm": 7.495000273619035, "learning_rate": 9.855713438101807e-06, "loss": 17.9456, "step": 5710 }, { "epoch": 0.10439249090609977, "grad_norm": 6.8303246224641585, "learning_rate": 9.855642831070046e-06, "loss": 17.7261, "step": 5711 }, { "epoch": 0.10441077010254629, "grad_norm": 8.105013767699267, "learning_rate": 9.85557220701968e-06, "loss": 18.0448, "step": 5712 }, { "epoch": 0.10442904929899281, "grad_norm": 7.403994990361161, "learning_rate": 9.85550156595095e-06, "loss": 17.8851, "step": 5713 }, { "epoch": 0.10444732849543933, "grad_norm": 7.770646153599739, "learning_rate": 9.855430907864113e-06, "loss": 18.2886, "step": 5714 }, { "epoch": 0.10446560769188587, "grad_norm": 7.474763922450885, "learning_rate": 9.85536023275941e-06, "loss": 17.843, "step": 5715 }, { "epoch": 0.10448388688833239, "grad_norm": 7.064507913735838, "learning_rate": 9.855289540637092e-06, "loss": 17.8636, "step": 5716 }, { "epoch": 0.10450216608477891, "grad_norm": 6.756623432684942, "learning_rate": 9.855218831497403e-06, "loss": 17.421, "step": 5717 }, { "epoch": 0.10452044528122544, "grad_norm": 7.261625310230588, "learning_rate": 9.855148105340595e-06, "loss": 17.5943, "step": 5718 }, { "epoch": 0.10453872447767196, "grad_norm": 7.1522997287521, "learning_rate": 9.855077362166914e-06, "loss": 17.6506, "step": 5719 }, { "epoch": 0.10455700367411848, "grad_norm": 7.21845105333718, "learning_rate": 9.855006601976608e-06, "loss": 17.5548, "step": 5720 }, { "epoch": 0.10457528287056501, "grad_norm": 5.992877911997206, "learning_rate": 9.854935824769926e-06, "loss": 17.5561, "step": 5721 }, { "epoch": 0.10459356206701154, "grad_norm": 8.51209455874131, "learning_rate": 9.854865030547115e-06, "loss": 18.3696, "step": 5722 }, { "epoch": 0.10461184126345806, "grad_norm": 5.996228208506489, "learning_rate": 9.854794219308424e-06, "loss": 17.2701, "step": 5723 }, { "epoch": 0.10463012045990458, "grad_norm": 6.118078868879499, "learning_rate": 9.854723391054097e-06, "loss": 17.5607, "step": 5724 }, { "epoch": 0.1046483996563511, "grad_norm": 7.117069409509465, "learning_rate": 9.854652545784388e-06, "loss": 17.538, "step": 5725 }, { "epoch": 0.10466667885279764, "grad_norm": 6.266101120638053, "learning_rate": 9.854581683499544e-06, "loss": 17.4521, "step": 5726 }, { "epoch": 0.10468495804924416, "grad_norm": 6.146426237738708, "learning_rate": 9.854510804199813e-06, "loss": 17.2019, "step": 5727 }, { "epoch": 0.10470323724569068, "grad_norm": 6.1851417483044235, "learning_rate": 9.854439907885441e-06, "loss": 17.437, "step": 5728 }, { "epoch": 0.1047215164421372, "grad_norm": 6.894948126716912, "learning_rate": 9.85436899455668e-06, "loss": 17.8609, "step": 5729 }, { "epoch": 0.10473979563858372, "grad_norm": 6.171758143473966, "learning_rate": 9.854298064213775e-06, "loss": 17.4036, "step": 5730 }, { "epoch": 0.10475807483503025, "grad_norm": 8.770487531018796, "learning_rate": 9.854227116856977e-06, "loss": 18.0638, "step": 5731 }, { "epoch": 0.10477635403147678, "grad_norm": 6.636523335196349, "learning_rate": 9.854156152486536e-06, "loss": 17.4718, "step": 5732 }, { "epoch": 0.1047946332279233, "grad_norm": 7.392082438690228, "learning_rate": 9.854085171102697e-06, "loss": 17.9873, "step": 5733 }, { "epoch": 0.10481291242436983, "grad_norm": 7.909454206722237, "learning_rate": 9.85401417270571e-06, "loss": 18.2473, "step": 5734 }, { "epoch": 0.10483119162081635, "grad_norm": 7.006888620312817, "learning_rate": 9.853943157295826e-06, "loss": 17.7185, "step": 5735 }, { "epoch": 0.10484947081726287, "grad_norm": 7.782189659230426, "learning_rate": 9.853872124873294e-06, "loss": 18.2295, "step": 5736 }, { "epoch": 0.10486775001370939, "grad_norm": 5.875710856288465, "learning_rate": 9.85380107543836e-06, "loss": 17.2333, "step": 5737 }, { "epoch": 0.10488602921015593, "grad_norm": 7.165194222622087, "learning_rate": 9.853730008991274e-06, "loss": 18.0052, "step": 5738 }, { "epoch": 0.10490430840660245, "grad_norm": 7.001827406262003, "learning_rate": 9.853658925532285e-06, "loss": 17.6866, "step": 5739 }, { "epoch": 0.10492258760304897, "grad_norm": 6.194436513322145, "learning_rate": 9.853587825061644e-06, "loss": 17.4752, "step": 5740 }, { "epoch": 0.10494086679949549, "grad_norm": 7.2999102677724155, "learning_rate": 9.853516707579598e-06, "loss": 17.6792, "step": 5741 }, { "epoch": 0.10495914599594201, "grad_norm": 11.076180042171714, "learning_rate": 9.853445573086396e-06, "loss": 18.2204, "step": 5742 }, { "epoch": 0.10497742519238855, "grad_norm": 6.769448177150094, "learning_rate": 9.853374421582291e-06, "loss": 17.4852, "step": 5743 }, { "epoch": 0.10499570438883507, "grad_norm": 7.339161168489227, "learning_rate": 9.853303253067528e-06, "loss": 17.9064, "step": 5744 }, { "epoch": 0.10501398358528159, "grad_norm": 8.842002938007269, "learning_rate": 9.853232067542358e-06, "loss": 18.4983, "step": 5745 }, { "epoch": 0.10503226278172811, "grad_norm": 7.149292050165079, "learning_rate": 9.853160865007032e-06, "loss": 17.9278, "step": 5746 }, { "epoch": 0.10505054197817464, "grad_norm": 7.63771705425234, "learning_rate": 9.853089645461798e-06, "loss": 18.2209, "step": 5747 }, { "epoch": 0.10506882117462116, "grad_norm": 7.203846304017191, "learning_rate": 9.853018408906905e-06, "loss": 17.4716, "step": 5748 }, { "epoch": 0.1050871003710677, "grad_norm": 7.158352887577222, "learning_rate": 9.852947155342606e-06, "loss": 17.6188, "step": 5749 }, { "epoch": 0.10510537956751421, "grad_norm": 8.11297657185183, "learning_rate": 9.852875884769146e-06, "loss": 18.0543, "step": 5750 }, { "epoch": 0.10512365876396074, "grad_norm": 7.727460131316621, "learning_rate": 9.852804597186777e-06, "loss": 17.9759, "step": 5751 }, { "epoch": 0.10514193796040726, "grad_norm": 7.057238262452905, "learning_rate": 9.85273329259575e-06, "loss": 17.6428, "step": 5752 }, { "epoch": 0.10516021715685378, "grad_norm": 6.714331539931519, "learning_rate": 9.852661970996314e-06, "loss": 17.4928, "step": 5753 }, { "epoch": 0.1051784963533003, "grad_norm": 7.166353058410686, "learning_rate": 9.852590632388719e-06, "loss": 17.9264, "step": 5754 }, { "epoch": 0.10519677554974684, "grad_norm": 7.137643891664648, "learning_rate": 9.852519276773214e-06, "loss": 17.9394, "step": 5755 }, { "epoch": 0.10521505474619336, "grad_norm": 7.616904490886513, "learning_rate": 9.85244790415005e-06, "loss": 18.1653, "step": 5756 }, { "epoch": 0.10523333394263988, "grad_norm": 7.455770390212299, "learning_rate": 9.852376514519478e-06, "loss": 18.1438, "step": 5757 }, { "epoch": 0.1052516131390864, "grad_norm": 9.109545469929985, "learning_rate": 9.852305107881747e-06, "loss": 18.4404, "step": 5758 }, { "epoch": 0.10526989233553292, "grad_norm": 7.181036814075525, "learning_rate": 9.852233684237107e-06, "loss": 17.8544, "step": 5759 }, { "epoch": 0.10528817153197946, "grad_norm": 6.999738257101895, "learning_rate": 9.85216224358581e-06, "loss": 17.8273, "step": 5760 }, { "epoch": 0.10530645072842598, "grad_norm": 8.094214736012356, "learning_rate": 9.852090785928105e-06, "loss": 18.148, "step": 5761 }, { "epoch": 0.1053247299248725, "grad_norm": 7.561774993680934, "learning_rate": 9.852019311264242e-06, "loss": 18.1062, "step": 5762 }, { "epoch": 0.10534300912131903, "grad_norm": 6.070270020687608, "learning_rate": 9.851947819594474e-06, "loss": 17.2214, "step": 5763 }, { "epoch": 0.10536128831776555, "grad_norm": 6.835985532335688, "learning_rate": 9.85187631091905e-06, "loss": 17.7722, "step": 5764 }, { "epoch": 0.10537956751421207, "grad_norm": 7.871504429435472, "learning_rate": 9.85180478523822e-06, "loss": 18.1517, "step": 5765 }, { "epoch": 0.1053978467106586, "grad_norm": 7.179465741023101, "learning_rate": 9.851733242552234e-06, "loss": 17.8843, "step": 5766 }, { "epoch": 0.10541612590710513, "grad_norm": 8.111602194577884, "learning_rate": 9.851661682861346e-06, "loss": 18.4305, "step": 5767 }, { "epoch": 0.10543440510355165, "grad_norm": 7.511389701700761, "learning_rate": 9.851590106165802e-06, "loss": 18.0181, "step": 5768 }, { "epoch": 0.10545268429999817, "grad_norm": 7.887199715998375, "learning_rate": 9.851518512465858e-06, "loss": 17.847, "step": 5769 }, { "epoch": 0.10547096349644469, "grad_norm": 7.949776827589897, "learning_rate": 9.851446901761763e-06, "loss": 18.3067, "step": 5770 }, { "epoch": 0.10548924269289121, "grad_norm": 7.935419213525369, "learning_rate": 9.851375274053766e-06, "loss": 18.0919, "step": 5771 }, { "epoch": 0.10550752188933775, "grad_norm": 8.700440051234581, "learning_rate": 9.851303629342122e-06, "loss": 18.42, "step": 5772 }, { "epoch": 0.10552580108578427, "grad_norm": 7.515220487706339, "learning_rate": 9.851231967627077e-06, "loss": 17.6783, "step": 5773 }, { "epoch": 0.10554408028223079, "grad_norm": 7.473438351789878, "learning_rate": 9.851160288908885e-06, "loss": 17.8782, "step": 5774 }, { "epoch": 0.10556235947867731, "grad_norm": 7.902432249555323, "learning_rate": 9.851088593187798e-06, "loss": 17.3811, "step": 5775 }, { "epoch": 0.10558063867512384, "grad_norm": 7.851866298826325, "learning_rate": 9.851016880464069e-06, "loss": 18.4099, "step": 5776 }, { "epoch": 0.10559891787157037, "grad_norm": 7.944718451043134, "learning_rate": 9.850945150737943e-06, "loss": 18.0104, "step": 5777 }, { "epoch": 0.1056171970680169, "grad_norm": 6.751090297928792, "learning_rate": 9.850873404009678e-06, "loss": 17.6698, "step": 5778 }, { "epoch": 0.10563547626446342, "grad_norm": 6.202448126840248, "learning_rate": 9.85080164027952e-06, "loss": 17.3501, "step": 5779 }, { "epoch": 0.10565375546090994, "grad_norm": 9.054764255352312, "learning_rate": 9.850729859547725e-06, "loss": 18.7583, "step": 5780 }, { "epoch": 0.10567203465735646, "grad_norm": 8.027026165249456, "learning_rate": 9.850658061814542e-06, "loss": 18.2361, "step": 5781 }, { "epoch": 0.10569031385380298, "grad_norm": 6.088920807800374, "learning_rate": 9.850586247080222e-06, "loss": 17.3435, "step": 5782 }, { "epoch": 0.10570859305024952, "grad_norm": 8.14422214983587, "learning_rate": 9.850514415345021e-06, "loss": 17.9607, "step": 5783 }, { "epoch": 0.10572687224669604, "grad_norm": 13.53775301378071, "learning_rate": 9.850442566609186e-06, "loss": 18.0239, "step": 5784 }, { "epoch": 0.10574515144314256, "grad_norm": 7.6884599728396985, "learning_rate": 9.850370700872971e-06, "loss": 18.2373, "step": 5785 }, { "epoch": 0.10576343063958908, "grad_norm": 6.910462429635305, "learning_rate": 9.85029881813663e-06, "loss": 17.3886, "step": 5786 }, { "epoch": 0.1057817098360356, "grad_norm": 6.389760222001589, "learning_rate": 9.85022691840041e-06, "loss": 17.8356, "step": 5787 }, { "epoch": 0.10579998903248213, "grad_norm": 7.11371599600536, "learning_rate": 9.850155001664565e-06, "loss": 18.0679, "step": 5788 }, { "epoch": 0.10581826822892866, "grad_norm": 8.723889814178113, "learning_rate": 9.85008306792935e-06, "loss": 17.9585, "step": 5789 }, { "epoch": 0.10583654742537518, "grad_norm": 8.407616307477696, "learning_rate": 9.850011117195013e-06, "loss": 18.2149, "step": 5790 }, { "epoch": 0.1058548266218217, "grad_norm": 7.7439217350300025, "learning_rate": 9.849939149461807e-06, "loss": 18.1134, "step": 5791 }, { "epoch": 0.10587310581826823, "grad_norm": 7.181175190486328, "learning_rate": 9.849867164729987e-06, "loss": 17.6784, "step": 5792 }, { "epoch": 0.10589138501471475, "grad_norm": 7.445632998724882, "learning_rate": 9.849795162999803e-06, "loss": 17.9333, "step": 5793 }, { "epoch": 0.10590966421116128, "grad_norm": 18.655089934948883, "learning_rate": 9.849723144271508e-06, "loss": 18.4331, "step": 5794 }, { "epoch": 0.1059279434076078, "grad_norm": 8.358368378192516, "learning_rate": 9.849651108545352e-06, "loss": 18.5816, "step": 5795 }, { "epoch": 0.10594622260405433, "grad_norm": 7.184188658641208, "learning_rate": 9.849579055821593e-06, "loss": 17.4722, "step": 5796 }, { "epoch": 0.10596450180050085, "grad_norm": 6.01263250028207, "learning_rate": 9.84950698610048e-06, "loss": 17.2748, "step": 5797 }, { "epoch": 0.10598278099694737, "grad_norm": 6.957652918901836, "learning_rate": 9.849434899382262e-06, "loss": 17.7175, "step": 5798 }, { "epoch": 0.10600106019339389, "grad_norm": 7.834313757092882, "learning_rate": 9.8493627956672e-06, "loss": 17.8026, "step": 5799 }, { "epoch": 0.10601933938984043, "grad_norm": 6.410711562250967, "learning_rate": 9.84929067495554e-06, "loss": 17.3805, "step": 5800 }, { "epoch": 0.10603761858628695, "grad_norm": 7.576663577969768, "learning_rate": 9.849218537247536e-06, "loss": 17.9673, "step": 5801 }, { "epoch": 0.10605589778273347, "grad_norm": 7.310560435954196, "learning_rate": 9.849146382543445e-06, "loss": 18.0013, "step": 5802 }, { "epoch": 0.10607417697918, "grad_norm": 9.029088680320438, "learning_rate": 9.849074210843513e-06, "loss": 18.54, "step": 5803 }, { "epoch": 0.10609245617562651, "grad_norm": 7.417258750524977, "learning_rate": 9.849002022148e-06, "loss": 18.0685, "step": 5804 }, { "epoch": 0.10611073537207304, "grad_norm": 8.217334091701655, "learning_rate": 9.848929816457153e-06, "loss": 18.4651, "step": 5805 }, { "epoch": 0.10612901456851957, "grad_norm": 8.143445762336139, "learning_rate": 9.848857593771228e-06, "loss": 18.1567, "step": 5806 }, { "epoch": 0.1061472937649661, "grad_norm": 7.917228134414341, "learning_rate": 9.84878535409048e-06, "loss": 18.3476, "step": 5807 }, { "epoch": 0.10616557296141262, "grad_norm": 7.883135615970315, "learning_rate": 9.848713097415159e-06, "loss": 18.3613, "step": 5808 }, { "epoch": 0.10618385215785914, "grad_norm": 6.170311486863312, "learning_rate": 9.848640823745518e-06, "loss": 17.4302, "step": 5809 }, { "epoch": 0.10620213135430566, "grad_norm": 7.893547079062363, "learning_rate": 9.848568533081815e-06, "loss": 18.4974, "step": 5810 }, { "epoch": 0.1062204105507522, "grad_norm": 5.988982428265839, "learning_rate": 9.848496225424298e-06, "loss": 17.2988, "step": 5811 }, { "epoch": 0.10623868974719872, "grad_norm": 7.861364822837715, "learning_rate": 9.84842390077322e-06, "loss": 18.2354, "step": 5812 }, { "epoch": 0.10625696894364524, "grad_norm": 6.942108917709645, "learning_rate": 9.848351559128842e-06, "loss": 17.7698, "step": 5813 }, { "epoch": 0.10627524814009176, "grad_norm": 6.922394060836148, "learning_rate": 9.848279200491409e-06, "loss": 17.5157, "step": 5814 }, { "epoch": 0.10629352733653828, "grad_norm": 6.213133657148186, "learning_rate": 9.848206824861179e-06, "loss": 17.3671, "step": 5815 }, { "epoch": 0.1063118065329848, "grad_norm": 7.1841358065442265, "learning_rate": 9.848134432238404e-06, "loss": 17.5998, "step": 5816 }, { "epoch": 0.10633008572943134, "grad_norm": 6.10260872789878, "learning_rate": 9.84806202262334e-06, "loss": 17.3957, "step": 5817 }, { "epoch": 0.10634836492587786, "grad_norm": 8.596100792519103, "learning_rate": 9.847989596016239e-06, "loss": 18.6582, "step": 5818 }, { "epoch": 0.10636664412232438, "grad_norm": 7.174812522740201, "learning_rate": 9.847917152417354e-06, "loss": 18.0218, "step": 5819 }, { "epoch": 0.1063849233187709, "grad_norm": 7.463897320160827, "learning_rate": 9.847844691826941e-06, "loss": 17.8591, "step": 5820 }, { "epoch": 0.10640320251521743, "grad_norm": 6.616699430849265, "learning_rate": 9.847772214245252e-06, "loss": 17.6413, "step": 5821 }, { "epoch": 0.10642148171166395, "grad_norm": 7.484435459009545, "learning_rate": 9.847699719672543e-06, "loss": 17.8048, "step": 5822 }, { "epoch": 0.10643976090811048, "grad_norm": 7.785509005815104, "learning_rate": 9.847627208109067e-06, "loss": 18.2513, "step": 5823 }, { "epoch": 0.106458040104557, "grad_norm": 7.029673768842909, "learning_rate": 9.847554679555078e-06, "loss": 17.6034, "step": 5824 }, { "epoch": 0.10647631930100353, "grad_norm": 7.060688571159415, "learning_rate": 9.847482134010833e-06, "loss": 17.9917, "step": 5825 }, { "epoch": 0.10649459849745005, "grad_norm": 7.408573515883231, "learning_rate": 9.84740957147658e-06, "loss": 18.0168, "step": 5826 }, { "epoch": 0.10651287769389657, "grad_norm": 6.007931771023013, "learning_rate": 9.84733699195258e-06, "loss": 17.0875, "step": 5827 }, { "epoch": 0.1065311568903431, "grad_norm": 6.920791435860862, "learning_rate": 9.847264395439083e-06, "loss": 17.8657, "step": 5828 }, { "epoch": 0.10654943608678963, "grad_norm": 7.043542400746885, "learning_rate": 9.847191781936344e-06, "loss": 17.6116, "step": 5829 }, { "epoch": 0.10656771528323615, "grad_norm": 8.035065072369735, "learning_rate": 9.84711915144462e-06, "loss": 17.736, "step": 5830 }, { "epoch": 0.10658599447968267, "grad_norm": 6.635212426764212, "learning_rate": 9.847046503964165e-06, "loss": 17.4266, "step": 5831 }, { "epoch": 0.1066042736761292, "grad_norm": 7.135348780517533, "learning_rate": 9.84697383949523e-06, "loss": 17.7999, "step": 5832 }, { "epoch": 0.10662255287257572, "grad_norm": 7.415046581608795, "learning_rate": 9.846901158038074e-06, "loss": 17.75, "step": 5833 }, { "epoch": 0.10664083206902225, "grad_norm": 7.4214861048291, "learning_rate": 9.846828459592949e-06, "loss": 17.939, "step": 5834 }, { "epoch": 0.10665911126546877, "grad_norm": 6.618460113089114, "learning_rate": 9.84675574416011e-06, "loss": 17.8388, "step": 5835 }, { "epoch": 0.1066773904619153, "grad_norm": 7.193582755325592, "learning_rate": 9.846683011739814e-06, "loss": 17.7923, "step": 5836 }, { "epoch": 0.10669566965836182, "grad_norm": 6.276348611044246, "learning_rate": 9.846610262332316e-06, "loss": 17.6146, "step": 5837 }, { "epoch": 0.10671394885480834, "grad_norm": 6.945116714956209, "learning_rate": 9.846537495937868e-06, "loss": 17.5195, "step": 5838 }, { "epoch": 0.10673222805125486, "grad_norm": 7.042928447394009, "learning_rate": 9.846464712556727e-06, "loss": 17.6989, "step": 5839 }, { "epoch": 0.1067505072477014, "grad_norm": 8.330215381609719, "learning_rate": 9.846391912189147e-06, "loss": 18.2885, "step": 5840 }, { "epoch": 0.10676878644414792, "grad_norm": 8.290067979149216, "learning_rate": 9.846319094835385e-06, "loss": 17.9417, "step": 5841 }, { "epoch": 0.10678706564059444, "grad_norm": 7.494712764272132, "learning_rate": 9.846246260495694e-06, "loss": 17.9946, "step": 5842 }, { "epoch": 0.10680534483704096, "grad_norm": 7.461643328013089, "learning_rate": 9.846173409170333e-06, "loss": 17.9484, "step": 5843 }, { "epoch": 0.10682362403348748, "grad_norm": 6.459782126537119, "learning_rate": 9.846100540859552e-06, "loss": 17.4873, "step": 5844 }, { "epoch": 0.10684190322993402, "grad_norm": 7.096034223286772, "learning_rate": 9.846027655563608e-06, "loss": 17.851, "step": 5845 }, { "epoch": 0.10686018242638054, "grad_norm": 6.840368061161655, "learning_rate": 9.84595475328276e-06, "loss": 17.6252, "step": 5846 }, { "epoch": 0.10687846162282706, "grad_norm": 8.080219112226988, "learning_rate": 9.84588183401726e-06, "loss": 18.2538, "step": 5847 }, { "epoch": 0.10689674081927358, "grad_norm": 7.212462871658821, "learning_rate": 9.845808897767366e-06, "loss": 17.5176, "step": 5848 }, { "epoch": 0.1069150200157201, "grad_norm": 6.984673547302289, "learning_rate": 9.845735944533333e-06, "loss": 17.6274, "step": 5849 }, { "epoch": 0.10693329921216663, "grad_norm": 7.583678677218762, "learning_rate": 9.845662974315413e-06, "loss": 17.9241, "step": 5850 }, { "epoch": 0.10695157840861316, "grad_norm": 6.838934157794366, "learning_rate": 9.845589987113866e-06, "loss": 17.6212, "step": 5851 }, { "epoch": 0.10696985760505968, "grad_norm": 8.029517620405162, "learning_rate": 9.845516982928948e-06, "loss": 17.9396, "step": 5852 }, { "epoch": 0.1069881368015062, "grad_norm": 7.383262938437746, "learning_rate": 9.845443961760912e-06, "loss": 17.9403, "step": 5853 }, { "epoch": 0.10700641599795273, "grad_norm": 7.23154303085679, "learning_rate": 9.845370923610016e-06, "loss": 18.1187, "step": 5854 }, { "epoch": 0.10702469519439925, "grad_norm": 9.145868149614028, "learning_rate": 9.845297868476515e-06, "loss": 18.387, "step": 5855 }, { "epoch": 0.10704297439084577, "grad_norm": 8.650895626845806, "learning_rate": 9.845224796360666e-06, "loss": 18.1168, "step": 5856 }, { "epoch": 0.10706125358729231, "grad_norm": 6.243190581430269, "learning_rate": 9.845151707262724e-06, "loss": 17.367, "step": 5857 }, { "epoch": 0.10707953278373883, "grad_norm": 8.252484291615621, "learning_rate": 9.845078601182945e-06, "loss": 18.4255, "step": 5858 }, { "epoch": 0.10709781198018535, "grad_norm": 6.875617965557466, "learning_rate": 9.845005478121588e-06, "loss": 17.7237, "step": 5859 }, { "epoch": 0.10711609117663187, "grad_norm": 11.891019983136049, "learning_rate": 9.844932338078905e-06, "loss": 18.2797, "step": 5860 }, { "epoch": 0.1071343703730784, "grad_norm": 8.115981877473052, "learning_rate": 9.844859181055155e-06, "loss": 18.3821, "step": 5861 }, { "epoch": 0.10715264956952493, "grad_norm": 8.74529679768113, "learning_rate": 9.844786007050595e-06, "loss": 18.3951, "step": 5862 }, { "epoch": 0.10717092876597145, "grad_norm": 7.823005414652111, "learning_rate": 9.84471281606548e-06, "loss": 17.94, "step": 5863 }, { "epoch": 0.10718920796241797, "grad_norm": 8.660947518752678, "learning_rate": 9.844639608100066e-06, "loss": 18.5589, "step": 5864 }, { "epoch": 0.1072074871588645, "grad_norm": 7.86980803121859, "learning_rate": 9.844566383154613e-06, "loss": 18.2445, "step": 5865 }, { "epoch": 0.10722576635531102, "grad_norm": 7.739345685288175, "learning_rate": 9.844493141229374e-06, "loss": 17.8264, "step": 5866 }, { "epoch": 0.10724404555175754, "grad_norm": 7.16751922221443, "learning_rate": 9.844419882324606e-06, "loss": 17.4311, "step": 5867 }, { "epoch": 0.10726232474820407, "grad_norm": 6.227577904831335, "learning_rate": 9.844346606440566e-06, "loss": 17.5006, "step": 5868 }, { "epoch": 0.1072806039446506, "grad_norm": 8.450923211490991, "learning_rate": 9.844273313577516e-06, "loss": 18.5062, "step": 5869 }, { "epoch": 0.10729888314109712, "grad_norm": 6.767150093959317, "learning_rate": 9.844200003735703e-06, "loss": 17.5192, "step": 5870 }, { "epoch": 0.10731716233754364, "grad_norm": 7.859555479293641, "learning_rate": 9.844126676915393e-06, "loss": 17.8175, "step": 5871 }, { "epoch": 0.10733544153399016, "grad_norm": 7.985006211089855, "learning_rate": 9.844053333116836e-06, "loss": 18.0713, "step": 5872 }, { "epoch": 0.1073537207304367, "grad_norm": 8.03526743666188, "learning_rate": 9.843979972340295e-06, "loss": 17.9783, "step": 5873 }, { "epoch": 0.10737199992688322, "grad_norm": 7.193838453325093, "learning_rate": 9.843906594586025e-06, "loss": 17.7052, "step": 5874 }, { "epoch": 0.10739027912332974, "grad_norm": 7.0876536261099305, "learning_rate": 9.84383319985428e-06, "loss": 17.8391, "step": 5875 }, { "epoch": 0.10740855831977626, "grad_norm": 8.60170433420796, "learning_rate": 9.843759788145323e-06, "loss": 18.1661, "step": 5876 }, { "epoch": 0.10742683751622278, "grad_norm": 8.318778188731175, "learning_rate": 9.843686359459406e-06, "loss": 18.1817, "step": 5877 }, { "epoch": 0.1074451167126693, "grad_norm": 6.712672907800277, "learning_rate": 9.84361291379679e-06, "loss": 17.5527, "step": 5878 }, { "epoch": 0.10746339590911584, "grad_norm": 8.150534231108727, "learning_rate": 9.84353945115773e-06, "loss": 18.1581, "step": 5879 }, { "epoch": 0.10748167510556236, "grad_norm": 6.359388740830054, "learning_rate": 9.843465971542485e-06, "loss": 17.3468, "step": 5880 }, { "epoch": 0.10749995430200888, "grad_norm": 6.293291538227234, "learning_rate": 9.843392474951312e-06, "loss": 17.4126, "step": 5881 }, { "epoch": 0.1075182334984554, "grad_norm": 7.168148977752168, "learning_rate": 9.843318961384469e-06, "loss": 17.7767, "step": 5882 }, { "epoch": 0.10753651269490193, "grad_norm": 7.088881635224715, "learning_rate": 9.843245430842215e-06, "loss": 17.7949, "step": 5883 }, { "epoch": 0.10755479189134845, "grad_norm": 6.50650777081013, "learning_rate": 9.843171883324802e-06, "loss": 17.6451, "step": 5884 }, { "epoch": 0.10757307108779499, "grad_norm": 6.569283480801569, "learning_rate": 9.843098318832495e-06, "loss": 17.816, "step": 5885 }, { "epoch": 0.10759135028424151, "grad_norm": 7.836204033462986, "learning_rate": 9.843024737365548e-06, "loss": 17.3799, "step": 5886 }, { "epoch": 0.10760962948068803, "grad_norm": 8.043309457711423, "learning_rate": 9.84295113892422e-06, "loss": 18.2676, "step": 5887 }, { "epoch": 0.10762790867713455, "grad_norm": 7.7764713348071695, "learning_rate": 9.842877523508766e-06, "loss": 18.2114, "step": 5888 }, { "epoch": 0.10764618787358107, "grad_norm": 8.185116511570026, "learning_rate": 9.842803891119448e-06, "loss": 17.9973, "step": 5889 }, { "epoch": 0.10766446707002761, "grad_norm": 7.8794932825997295, "learning_rate": 9.842730241756524e-06, "loss": 18.078, "step": 5890 }, { "epoch": 0.10768274626647413, "grad_norm": 6.539034919480205, "learning_rate": 9.842656575420248e-06, "loss": 17.4952, "step": 5891 }, { "epoch": 0.10770102546292065, "grad_norm": 6.66230941878411, "learning_rate": 9.842582892110884e-06, "loss": 17.3589, "step": 5892 }, { "epoch": 0.10771930465936717, "grad_norm": 7.067289793977617, "learning_rate": 9.842509191828686e-06, "loss": 17.7478, "step": 5893 }, { "epoch": 0.1077375838558137, "grad_norm": 7.951195424808459, "learning_rate": 9.842435474573912e-06, "loss": 17.9751, "step": 5894 }, { "epoch": 0.10775586305226022, "grad_norm": 6.353737087620511, "learning_rate": 9.842361740346824e-06, "loss": 17.5808, "step": 5895 }, { "epoch": 0.10777414224870675, "grad_norm": 6.010220626394896, "learning_rate": 9.842287989147676e-06, "loss": 17.3642, "step": 5896 }, { "epoch": 0.10779242144515327, "grad_norm": 7.558035373074513, "learning_rate": 9.84221422097673e-06, "loss": 17.7662, "step": 5897 }, { "epoch": 0.1078107006415998, "grad_norm": 7.404483922619043, "learning_rate": 9.842140435834245e-06, "loss": 17.7805, "step": 5898 }, { "epoch": 0.10782897983804632, "grad_norm": 7.124061337922959, "learning_rate": 9.842066633720477e-06, "loss": 17.914, "step": 5899 }, { "epoch": 0.10784725903449284, "grad_norm": 6.873473487171974, "learning_rate": 9.841992814635683e-06, "loss": 17.6703, "step": 5900 }, { "epoch": 0.10786553823093936, "grad_norm": 8.722661879244907, "learning_rate": 9.841918978580128e-06, "loss": 18.4554, "step": 5901 }, { "epoch": 0.1078838174273859, "grad_norm": 6.38762554509518, "learning_rate": 9.841845125554067e-06, "loss": 17.3544, "step": 5902 }, { "epoch": 0.10790209662383242, "grad_norm": 6.1568219449726405, "learning_rate": 9.841771255557757e-06, "loss": 17.3558, "step": 5903 }, { "epoch": 0.10792037582027894, "grad_norm": 6.749550000349546, "learning_rate": 9.841697368591458e-06, "loss": 17.4397, "step": 5904 }, { "epoch": 0.10793865501672546, "grad_norm": 5.768599118609642, "learning_rate": 9.841623464655433e-06, "loss": 17.223, "step": 5905 }, { "epoch": 0.10795693421317198, "grad_norm": 6.995410955249487, "learning_rate": 9.841549543749935e-06, "loss": 17.6919, "step": 5906 }, { "epoch": 0.10797521340961852, "grad_norm": 6.957556811186082, "learning_rate": 9.841475605875227e-06, "loss": 17.8107, "step": 5907 }, { "epoch": 0.10799349260606504, "grad_norm": 12.347647844029785, "learning_rate": 9.841401651031568e-06, "loss": 18.7845, "step": 5908 }, { "epoch": 0.10801177180251156, "grad_norm": 6.269954101478955, "learning_rate": 9.841327679219214e-06, "loss": 17.7051, "step": 5909 }, { "epoch": 0.10803005099895809, "grad_norm": 6.948777492521897, "learning_rate": 9.841253690438429e-06, "loss": 17.6624, "step": 5910 }, { "epoch": 0.10804833019540461, "grad_norm": 5.970114760504495, "learning_rate": 9.84117968468947e-06, "loss": 17.1389, "step": 5911 }, { "epoch": 0.10806660939185113, "grad_norm": 7.510658700805964, "learning_rate": 9.841105661972594e-06, "loss": 17.6788, "step": 5912 }, { "epoch": 0.10808488858829766, "grad_norm": 7.203161598491203, "learning_rate": 9.841031622288065e-06, "loss": 17.8298, "step": 5913 }, { "epoch": 0.10810316778474419, "grad_norm": 6.859963289080416, "learning_rate": 9.84095756563614e-06, "loss": 17.3716, "step": 5914 }, { "epoch": 0.10812144698119071, "grad_norm": 7.47334448271494, "learning_rate": 9.840883492017078e-06, "loss": 17.9008, "step": 5915 }, { "epoch": 0.10813972617763723, "grad_norm": 6.113853926642932, "learning_rate": 9.84080940143114e-06, "loss": 17.2308, "step": 5916 }, { "epoch": 0.10815800537408375, "grad_norm": 6.4299119208739866, "learning_rate": 9.840735293878585e-06, "loss": 17.5363, "step": 5917 }, { "epoch": 0.10817628457053027, "grad_norm": 7.4029030745756215, "learning_rate": 9.840661169359673e-06, "loss": 18.1808, "step": 5918 }, { "epoch": 0.10819456376697681, "grad_norm": 7.13773841709815, "learning_rate": 9.840587027874661e-06, "loss": 17.9393, "step": 5919 }, { "epoch": 0.10821284296342333, "grad_norm": 7.589838535219721, "learning_rate": 9.840512869423816e-06, "loss": 17.9286, "step": 5920 }, { "epoch": 0.10823112215986985, "grad_norm": 6.882416624256553, "learning_rate": 9.840438694007391e-06, "loss": 17.5512, "step": 5921 }, { "epoch": 0.10824940135631637, "grad_norm": 6.743110939806677, "learning_rate": 9.840364501625647e-06, "loss": 17.6088, "step": 5922 }, { "epoch": 0.1082676805527629, "grad_norm": 8.729816397999063, "learning_rate": 9.84029029227885e-06, "loss": 18.6794, "step": 5923 }, { "epoch": 0.10828595974920943, "grad_norm": 7.751836073429958, "learning_rate": 9.840216065967251e-06, "loss": 18.2195, "step": 5924 }, { "epoch": 0.10830423894565595, "grad_norm": 7.417194641590402, "learning_rate": 9.840141822691116e-06, "loss": 17.6567, "step": 5925 }, { "epoch": 0.10832251814210248, "grad_norm": 7.6162404810380036, "learning_rate": 9.840067562450704e-06, "loss": 17.8032, "step": 5926 }, { "epoch": 0.108340797338549, "grad_norm": 6.9424441577606695, "learning_rate": 9.839993285246276e-06, "loss": 17.9385, "step": 5927 }, { "epoch": 0.10835907653499552, "grad_norm": 7.8256812426243645, "learning_rate": 9.839918991078091e-06, "loss": 17.7594, "step": 5928 }, { "epoch": 0.10837735573144204, "grad_norm": 8.19408397305482, "learning_rate": 9.83984467994641e-06, "loss": 18.3648, "step": 5929 }, { "epoch": 0.10839563492788858, "grad_norm": 7.69688900212192, "learning_rate": 9.839770351851494e-06, "loss": 18.1308, "step": 5930 }, { "epoch": 0.1084139141243351, "grad_norm": 7.403797404073557, "learning_rate": 9.839696006793601e-06, "loss": 17.5829, "step": 5931 }, { "epoch": 0.10843219332078162, "grad_norm": 7.783961116571769, "learning_rate": 9.839621644772996e-06, "loss": 18.2717, "step": 5932 }, { "epoch": 0.10845047251722814, "grad_norm": 8.471935534814689, "learning_rate": 9.839547265789935e-06, "loss": 18.0535, "step": 5933 }, { "epoch": 0.10846875171367466, "grad_norm": 6.6672925736520625, "learning_rate": 9.839472869844683e-06, "loss": 17.2993, "step": 5934 }, { "epoch": 0.10848703091012118, "grad_norm": 8.592884386398634, "learning_rate": 9.839398456937497e-06, "loss": 18.2076, "step": 5935 }, { "epoch": 0.10850531010656772, "grad_norm": 7.319125332836831, "learning_rate": 9.839324027068638e-06, "loss": 17.8035, "step": 5936 }, { "epoch": 0.10852358930301424, "grad_norm": 7.908470337743244, "learning_rate": 9.83924958023837e-06, "loss": 18.4819, "step": 5937 }, { "epoch": 0.10854186849946076, "grad_norm": 8.936807942210304, "learning_rate": 9.839175116446953e-06, "loss": 18.6143, "step": 5938 }, { "epoch": 0.10856014769590729, "grad_norm": 6.446386405095052, "learning_rate": 9.839100635694644e-06, "loss": 17.5366, "step": 5939 }, { "epoch": 0.10857842689235381, "grad_norm": 7.69040520412441, "learning_rate": 9.83902613798171e-06, "loss": 17.7365, "step": 5940 }, { "epoch": 0.10859670608880034, "grad_norm": 6.101400336742993, "learning_rate": 9.83895162330841e-06, "loss": 17.2802, "step": 5941 }, { "epoch": 0.10861498528524687, "grad_norm": 6.626590924164906, "learning_rate": 9.838877091675001e-06, "loss": 17.5935, "step": 5942 }, { "epoch": 0.10863326448169339, "grad_norm": 9.680701863675633, "learning_rate": 9.83880254308175e-06, "loss": 18.6882, "step": 5943 }, { "epoch": 0.10865154367813991, "grad_norm": 7.19408308711397, "learning_rate": 9.838727977528917e-06, "loss": 17.7942, "step": 5944 }, { "epoch": 0.10866982287458643, "grad_norm": 6.788870980507977, "learning_rate": 9.83865339501676e-06, "loss": 17.4205, "step": 5945 }, { "epoch": 0.10868810207103295, "grad_norm": 7.200683231587349, "learning_rate": 9.838578795545544e-06, "loss": 17.8494, "step": 5946 }, { "epoch": 0.10870638126747949, "grad_norm": 6.88088555284488, "learning_rate": 9.838504179115528e-06, "loss": 17.8073, "step": 5947 }, { "epoch": 0.10872466046392601, "grad_norm": 9.24156242485691, "learning_rate": 9.838429545726977e-06, "loss": 18.2847, "step": 5948 }, { "epoch": 0.10874293966037253, "grad_norm": 7.4897225574815405, "learning_rate": 9.83835489538015e-06, "loss": 17.7234, "step": 5949 }, { "epoch": 0.10876121885681905, "grad_norm": 7.260971801654793, "learning_rate": 9.838280228075306e-06, "loss": 18.0967, "step": 5950 }, { "epoch": 0.10877949805326557, "grad_norm": 7.938202720302389, "learning_rate": 9.838205543812712e-06, "loss": 17.9975, "step": 5951 }, { "epoch": 0.1087977772497121, "grad_norm": 7.715660297079626, "learning_rate": 9.838130842592626e-06, "loss": 17.9109, "step": 5952 }, { "epoch": 0.10881605644615863, "grad_norm": 7.0959843202199675, "learning_rate": 9.838056124415312e-06, "loss": 17.5292, "step": 5953 }, { "epoch": 0.10883433564260515, "grad_norm": 7.051511554435222, "learning_rate": 9.837981389281031e-06, "loss": 17.4281, "step": 5954 }, { "epoch": 0.10885261483905168, "grad_norm": 7.186044339479653, "learning_rate": 9.837906637190046e-06, "loss": 17.6825, "step": 5955 }, { "epoch": 0.1088708940354982, "grad_norm": 6.802459216189041, "learning_rate": 9.837831868142618e-06, "loss": 17.7053, "step": 5956 }, { "epoch": 0.10888917323194472, "grad_norm": 6.841393741814351, "learning_rate": 9.837757082139007e-06, "loss": 17.5404, "step": 5957 }, { "epoch": 0.10890745242839125, "grad_norm": 6.915070035197545, "learning_rate": 9.837682279179479e-06, "loss": 17.7828, "step": 5958 }, { "epoch": 0.10892573162483778, "grad_norm": 6.676637498053301, "learning_rate": 9.837607459264294e-06, "loss": 17.5676, "step": 5959 }, { "epoch": 0.1089440108212843, "grad_norm": 8.202980699646995, "learning_rate": 9.837532622393716e-06, "loss": 17.9422, "step": 5960 }, { "epoch": 0.10896229001773082, "grad_norm": 7.886380009870115, "learning_rate": 9.837457768568004e-06, "loss": 18.4824, "step": 5961 }, { "epoch": 0.10898056921417734, "grad_norm": 7.758729587760364, "learning_rate": 9.837382897787423e-06, "loss": 17.5843, "step": 5962 }, { "epoch": 0.10899884841062386, "grad_norm": 7.420610363317163, "learning_rate": 9.837308010052236e-06, "loss": 18.0441, "step": 5963 }, { "epoch": 0.1090171276070704, "grad_norm": 8.33113732511878, "learning_rate": 9.837233105362703e-06, "loss": 18.0701, "step": 5964 }, { "epoch": 0.10903540680351692, "grad_norm": 7.841425209863305, "learning_rate": 9.837158183719086e-06, "loss": 17.9798, "step": 5965 }, { "epoch": 0.10905368599996344, "grad_norm": 8.093031035859577, "learning_rate": 9.837083245121651e-06, "loss": 18.37, "step": 5966 }, { "epoch": 0.10907196519640996, "grad_norm": 8.81414523026481, "learning_rate": 9.83700828957066e-06, "loss": 18.2651, "step": 5967 }, { "epoch": 0.10909024439285649, "grad_norm": 6.699458389845013, "learning_rate": 9.836933317066373e-06, "loss": 17.5269, "step": 5968 }, { "epoch": 0.10910852358930301, "grad_norm": 9.599192042839118, "learning_rate": 9.836858327609055e-06, "loss": 18.7339, "step": 5969 }, { "epoch": 0.10912680278574954, "grad_norm": 7.296966831416328, "learning_rate": 9.836783321198968e-06, "loss": 18.1201, "step": 5970 }, { "epoch": 0.10914508198219607, "grad_norm": 7.493497210818273, "learning_rate": 9.836708297836375e-06, "loss": 17.7271, "step": 5971 }, { "epoch": 0.10916336117864259, "grad_norm": 6.62248749502922, "learning_rate": 9.83663325752154e-06, "loss": 17.6155, "step": 5972 }, { "epoch": 0.10918164037508911, "grad_norm": 7.586502094244091, "learning_rate": 9.836558200254725e-06, "loss": 17.8567, "step": 5973 }, { "epoch": 0.10919991957153563, "grad_norm": 6.763971976362987, "learning_rate": 9.836483126036192e-06, "loss": 17.2696, "step": 5974 }, { "epoch": 0.10921819876798217, "grad_norm": 6.633354735131761, "learning_rate": 9.836408034866207e-06, "loss": 17.3721, "step": 5975 }, { "epoch": 0.10923647796442869, "grad_norm": 6.853504839258941, "learning_rate": 9.836332926745031e-06, "loss": 17.5828, "step": 5976 }, { "epoch": 0.10925475716087521, "grad_norm": 7.780416455535054, "learning_rate": 9.836257801672927e-06, "loss": 18.1243, "step": 5977 }, { "epoch": 0.10927303635732173, "grad_norm": 6.5123631034928495, "learning_rate": 9.83618265965016e-06, "loss": 17.4059, "step": 5978 }, { "epoch": 0.10929131555376825, "grad_norm": 8.787405722245287, "learning_rate": 9.836107500676992e-06, "loss": 18.1918, "step": 5979 }, { "epoch": 0.10930959475021478, "grad_norm": 6.9205916043598235, "learning_rate": 9.836032324753687e-06, "loss": 17.8655, "step": 5980 }, { "epoch": 0.10932787394666131, "grad_norm": 7.818444687855867, "learning_rate": 9.835957131880508e-06, "loss": 18.3451, "step": 5981 }, { "epoch": 0.10934615314310783, "grad_norm": 7.955793940216197, "learning_rate": 9.83588192205772e-06, "loss": 18.0892, "step": 5982 }, { "epoch": 0.10936443233955435, "grad_norm": 6.522733972424496, "learning_rate": 9.835806695285583e-06, "loss": 17.4412, "step": 5983 }, { "epoch": 0.10938271153600088, "grad_norm": 5.546367331964139, "learning_rate": 9.835731451564365e-06, "loss": 17.0042, "step": 5984 }, { "epoch": 0.1094009907324474, "grad_norm": 8.254720366045445, "learning_rate": 9.835656190894329e-06, "loss": 18.5262, "step": 5985 }, { "epoch": 0.10941926992889392, "grad_norm": 8.905036048111233, "learning_rate": 9.835580913275736e-06, "loss": 18.3572, "step": 5986 }, { "epoch": 0.10943754912534046, "grad_norm": 5.716347342494404, "learning_rate": 9.835505618708851e-06, "loss": 17.3397, "step": 5987 }, { "epoch": 0.10945582832178698, "grad_norm": 7.297773266161931, "learning_rate": 9.83543030719394e-06, "loss": 17.7705, "step": 5988 }, { "epoch": 0.1094741075182335, "grad_norm": 7.542163751455365, "learning_rate": 9.835354978731265e-06, "loss": 17.8389, "step": 5989 }, { "epoch": 0.10949238671468002, "grad_norm": 7.400703798602654, "learning_rate": 9.835279633321091e-06, "loss": 17.7062, "step": 5990 }, { "epoch": 0.10951066591112654, "grad_norm": 6.609218880417126, "learning_rate": 9.83520427096368e-06, "loss": 17.4464, "step": 5991 }, { "epoch": 0.10952894510757308, "grad_norm": 8.27129966257969, "learning_rate": 9.835128891659298e-06, "loss": 18.5295, "step": 5992 }, { "epoch": 0.1095472243040196, "grad_norm": 7.842780011688152, "learning_rate": 9.835053495408209e-06, "loss": 17.9532, "step": 5993 }, { "epoch": 0.10956550350046612, "grad_norm": 7.941038020737538, "learning_rate": 9.834978082210678e-06, "loss": 18.1383, "step": 5994 }, { "epoch": 0.10958378269691264, "grad_norm": 7.385043901041865, "learning_rate": 9.834902652066966e-06, "loss": 18.058, "step": 5995 }, { "epoch": 0.10960206189335917, "grad_norm": 6.432381254149862, "learning_rate": 9.834827204977342e-06, "loss": 17.1324, "step": 5996 }, { "epoch": 0.10962034108980569, "grad_norm": 7.004934340731052, "learning_rate": 9.834751740942068e-06, "loss": 17.6494, "step": 5997 }, { "epoch": 0.10963862028625222, "grad_norm": 6.879043813875452, "learning_rate": 9.834676259961407e-06, "loss": 17.6899, "step": 5998 }, { "epoch": 0.10965689948269874, "grad_norm": 8.461280155671439, "learning_rate": 9.834600762035626e-06, "loss": 18.5268, "step": 5999 }, { "epoch": 0.10967517867914527, "grad_norm": 7.585188465613532, "learning_rate": 9.83452524716499e-06, "loss": 17.8837, "step": 6000 }, { "epoch": 0.10969345787559179, "grad_norm": 7.404804223634506, "learning_rate": 9.83444971534976e-06, "loss": 18.0314, "step": 6001 }, { "epoch": 0.10971173707203831, "grad_norm": 8.341881637201688, "learning_rate": 9.834374166590206e-06, "loss": 17.9829, "step": 6002 }, { "epoch": 0.10973001626848483, "grad_norm": 6.846917649382119, "learning_rate": 9.834298600886589e-06, "loss": 17.9417, "step": 6003 }, { "epoch": 0.10974829546493137, "grad_norm": 6.829582477175553, "learning_rate": 9.834223018239175e-06, "loss": 17.6357, "step": 6004 }, { "epoch": 0.10976657466137789, "grad_norm": 6.599843832524429, "learning_rate": 9.83414741864823e-06, "loss": 17.4774, "step": 6005 }, { "epoch": 0.10978485385782441, "grad_norm": 6.424816102653562, "learning_rate": 9.834071802114016e-06, "loss": 17.4823, "step": 6006 }, { "epoch": 0.10980313305427093, "grad_norm": 6.48361962247988, "learning_rate": 9.833996168636801e-06, "loss": 17.4452, "step": 6007 }, { "epoch": 0.10982141225071745, "grad_norm": 6.130064538530384, "learning_rate": 9.833920518216848e-06, "loss": 17.2327, "step": 6008 }, { "epoch": 0.10983969144716399, "grad_norm": 7.414484958983045, "learning_rate": 9.833844850854422e-06, "loss": 18.0387, "step": 6009 }, { "epoch": 0.10985797064361051, "grad_norm": 6.878425341927839, "learning_rate": 9.833769166549792e-06, "loss": 17.406, "step": 6010 }, { "epoch": 0.10987624984005703, "grad_norm": 7.473652822725783, "learning_rate": 9.833693465303217e-06, "loss": 17.9417, "step": 6011 }, { "epoch": 0.10989452903650355, "grad_norm": 5.7167561414418095, "learning_rate": 9.833617747114969e-06, "loss": 17.4244, "step": 6012 }, { "epoch": 0.10991280823295008, "grad_norm": 7.021581532495483, "learning_rate": 9.83354201198531e-06, "loss": 17.5103, "step": 6013 }, { "epoch": 0.1099310874293966, "grad_norm": 6.459942931667375, "learning_rate": 9.833466259914503e-06, "loss": 17.5817, "step": 6014 }, { "epoch": 0.10994936662584313, "grad_norm": 7.1900032351537515, "learning_rate": 9.833390490902819e-06, "loss": 17.847, "step": 6015 }, { "epoch": 0.10996764582228966, "grad_norm": 7.904453231222815, "learning_rate": 9.83331470495052e-06, "loss": 17.956, "step": 6016 }, { "epoch": 0.10998592501873618, "grad_norm": 7.427178197504679, "learning_rate": 9.833238902057873e-06, "loss": 17.8389, "step": 6017 }, { "epoch": 0.1100042042151827, "grad_norm": 6.208686767567184, "learning_rate": 9.83316308222514e-06, "loss": 17.3005, "step": 6018 }, { "epoch": 0.11002248341162922, "grad_norm": 7.847786718059512, "learning_rate": 9.833087245452594e-06, "loss": 17.9887, "step": 6019 }, { "epoch": 0.11004076260807574, "grad_norm": 6.654436993277937, "learning_rate": 9.833011391740494e-06, "loss": 17.688, "step": 6020 }, { "epoch": 0.11005904180452228, "grad_norm": 7.973594040381017, "learning_rate": 9.832935521089109e-06, "loss": 18.3191, "step": 6021 }, { "epoch": 0.1100773210009688, "grad_norm": 8.951738839041905, "learning_rate": 9.832859633498704e-06, "loss": 18.1994, "step": 6022 }, { "epoch": 0.11009560019741532, "grad_norm": 5.833255689683318, "learning_rate": 9.832783728969546e-06, "loss": 17.0594, "step": 6023 }, { "epoch": 0.11011387939386184, "grad_norm": 6.54454264521597, "learning_rate": 9.832707807501902e-06, "loss": 17.3724, "step": 6024 }, { "epoch": 0.11013215859030837, "grad_norm": 7.690288277658039, "learning_rate": 9.832631869096034e-06, "loss": 18.2661, "step": 6025 }, { "epoch": 0.1101504377867549, "grad_norm": 7.299021017295833, "learning_rate": 9.832555913752211e-06, "loss": 17.555, "step": 6026 }, { "epoch": 0.11016871698320142, "grad_norm": 6.828135335092559, "learning_rate": 9.832479941470699e-06, "loss": 17.5603, "step": 6027 }, { "epoch": 0.11018699617964794, "grad_norm": 7.864703887956888, "learning_rate": 9.832403952251765e-06, "loss": 18.39, "step": 6028 }, { "epoch": 0.11020527537609447, "grad_norm": 6.4129004093808515, "learning_rate": 9.832327946095674e-06, "loss": 17.4481, "step": 6029 }, { "epoch": 0.11022355457254099, "grad_norm": 10.061475671645136, "learning_rate": 9.832251923002692e-06, "loss": 19.0137, "step": 6030 }, { "epoch": 0.11024183376898751, "grad_norm": 6.325548363634723, "learning_rate": 9.832175882973088e-06, "loss": 17.4535, "step": 6031 }, { "epoch": 0.11026011296543405, "grad_norm": 7.493245225301195, "learning_rate": 9.832099826007126e-06, "loss": 17.9325, "step": 6032 }, { "epoch": 0.11027839216188057, "grad_norm": 7.8488940376631255, "learning_rate": 9.832023752105073e-06, "loss": 17.7612, "step": 6033 }, { "epoch": 0.11029667135832709, "grad_norm": 7.88173752708001, "learning_rate": 9.831947661267196e-06, "loss": 18.057, "step": 6034 }, { "epoch": 0.11031495055477361, "grad_norm": 6.316696587296195, "learning_rate": 9.831871553493763e-06, "loss": 17.3385, "step": 6035 }, { "epoch": 0.11033322975122013, "grad_norm": 6.573038168903045, "learning_rate": 9.831795428785038e-06, "loss": 17.3735, "step": 6036 }, { "epoch": 0.11035150894766665, "grad_norm": 7.502583056333343, "learning_rate": 9.83171928714129e-06, "loss": 18.2043, "step": 6037 }, { "epoch": 0.11036978814411319, "grad_norm": 6.639241512757685, "learning_rate": 9.831643128562786e-06, "loss": 17.56, "step": 6038 }, { "epoch": 0.11038806734055971, "grad_norm": 6.481501088163189, "learning_rate": 9.831566953049791e-06, "loss": 17.4618, "step": 6039 }, { "epoch": 0.11040634653700623, "grad_norm": 7.354046600163907, "learning_rate": 9.831490760602573e-06, "loss": 18.3112, "step": 6040 }, { "epoch": 0.11042462573345276, "grad_norm": 7.255419059578754, "learning_rate": 9.8314145512214e-06, "loss": 17.9431, "step": 6041 }, { "epoch": 0.11044290492989928, "grad_norm": 6.92008124755534, "learning_rate": 9.831338324906537e-06, "loss": 17.8271, "step": 6042 }, { "epoch": 0.11046118412634581, "grad_norm": 6.3004293240695235, "learning_rate": 9.831262081658253e-06, "loss": 17.1417, "step": 6043 }, { "epoch": 0.11047946332279233, "grad_norm": 6.620006993373256, "learning_rate": 9.831185821476815e-06, "loss": 17.4947, "step": 6044 }, { "epoch": 0.11049774251923886, "grad_norm": 6.307453629542876, "learning_rate": 9.831109544362489e-06, "loss": 17.689, "step": 6045 }, { "epoch": 0.11051602171568538, "grad_norm": 7.194822447342063, "learning_rate": 9.831033250315544e-06, "loss": 17.7781, "step": 6046 }, { "epoch": 0.1105343009121319, "grad_norm": 8.625339065623256, "learning_rate": 9.830956939336248e-06, "loss": 18.3123, "step": 6047 }, { "epoch": 0.11055258010857842, "grad_norm": 6.474551579719272, "learning_rate": 9.830880611424866e-06, "loss": 17.5376, "step": 6048 }, { "epoch": 0.11057085930502496, "grad_norm": 7.036493049200884, "learning_rate": 9.830804266581667e-06, "loss": 17.7203, "step": 6049 }, { "epoch": 0.11058913850147148, "grad_norm": 8.329100061696156, "learning_rate": 9.830727904806918e-06, "loss": 18.3742, "step": 6050 }, { "epoch": 0.110607417697918, "grad_norm": 7.052059043051446, "learning_rate": 9.830651526100884e-06, "loss": 17.7059, "step": 6051 }, { "epoch": 0.11062569689436452, "grad_norm": 6.58886304139191, "learning_rate": 9.83057513046384e-06, "loss": 17.5383, "step": 6052 }, { "epoch": 0.11064397609081104, "grad_norm": 6.97142208965401, "learning_rate": 9.830498717896047e-06, "loss": 17.6627, "step": 6053 }, { "epoch": 0.11066225528725757, "grad_norm": 6.601100442211125, "learning_rate": 9.830422288397776e-06, "loss": 17.5335, "step": 6054 }, { "epoch": 0.1106805344837041, "grad_norm": 6.573928381752774, "learning_rate": 9.830345841969294e-06, "loss": 17.7606, "step": 6055 }, { "epoch": 0.11069881368015062, "grad_norm": 6.799830932989246, "learning_rate": 9.830269378610868e-06, "loss": 17.5729, "step": 6056 }, { "epoch": 0.11071709287659715, "grad_norm": 7.876836877064901, "learning_rate": 9.830192898322768e-06, "loss": 17.851, "step": 6057 }, { "epoch": 0.11073537207304367, "grad_norm": 7.555648849237257, "learning_rate": 9.83011640110526e-06, "loss": 17.9312, "step": 6058 }, { "epoch": 0.11075365126949019, "grad_norm": 7.203739739028518, "learning_rate": 9.830039886958615e-06, "loss": 17.7517, "step": 6059 }, { "epoch": 0.11077193046593672, "grad_norm": 7.177766089570443, "learning_rate": 9.829963355883098e-06, "loss": 17.9962, "step": 6060 }, { "epoch": 0.11079020966238325, "grad_norm": 6.5571110304539, "learning_rate": 9.829886807878979e-06, "loss": 17.6729, "step": 6061 }, { "epoch": 0.11080848885882977, "grad_norm": 6.139209121599606, "learning_rate": 9.829810242946525e-06, "loss": 17.3619, "step": 6062 }, { "epoch": 0.11082676805527629, "grad_norm": 7.792781352189638, "learning_rate": 9.829733661086005e-06, "loss": 17.941, "step": 6063 }, { "epoch": 0.11084504725172281, "grad_norm": 7.615161081426064, "learning_rate": 9.82965706229769e-06, "loss": 17.8296, "step": 6064 }, { "epoch": 0.11086332644816933, "grad_norm": 8.869587136548185, "learning_rate": 9.829580446581843e-06, "loss": 18.6859, "step": 6065 }, { "epoch": 0.11088160564461587, "grad_norm": 7.0451364874647595, "learning_rate": 9.82950381393874e-06, "loss": 17.4014, "step": 6066 }, { "epoch": 0.11089988484106239, "grad_norm": 6.641124738432329, "learning_rate": 9.82942716436864e-06, "loss": 17.6367, "step": 6067 }, { "epoch": 0.11091816403750891, "grad_norm": 6.614254508646868, "learning_rate": 9.82935049787182e-06, "loss": 17.6394, "step": 6068 }, { "epoch": 0.11093644323395543, "grad_norm": 6.62975390893817, "learning_rate": 9.829273814448546e-06, "loss": 17.7465, "step": 6069 }, { "epoch": 0.11095472243040196, "grad_norm": 7.512657342412706, "learning_rate": 9.829197114099084e-06, "loss": 17.6712, "step": 6070 }, { "epoch": 0.11097300162684848, "grad_norm": 6.07668994729175, "learning_rate": 9.829120396823706e-06, "loss": 17.2348, "step": 6071 }, { "epoch": 0.11099128082329501, "grad_norm": 8.122956038676117, "learning_rate": 9.829043662622681e-06, "loss": 18.3812, "step": 6072 }, { "epoch": 0.11100956001974153, "grad_norm": 7.771634793500091, "learning_rate": 9.828966911496277e-06, "loss": 17.8931, "step": 6073 }, { "epoch": 0.11102783921618806, "grad_norm": 7.414760524464213, "learning_rate": 9.828890143444763e-06, "loss": 18.34, "step": 6074 }, { "epoch": 0.11104611841263458, "grad_norm": 6.855095671572821, "learning_rate": 9.828813358468406e-06, "loss": 17.9983, "step": 6075 }, { "epoch": 0.1110643976090811, "grad_norm": 6.077699519279136, "learning_rate": 9.82873655656748e-06, "loss": 17.3106, "step": 6076 }, { "epoch": 0.11108267680552764, "grad_norm": 7.0648388877619075, "learning_rate": 9.82865973774225e-06, "loss": 17.9716, "step": 6077 }, { "epoch": 0.11110095600197416, "grad_norm": 6.766820816789677, "learning_rate": 9.828582901992987e-06, "loss": 17.4199, "step": 6078 }, { "epoch": 0.11111923519842068, "grad_norm": 7.267164185676141, "learning_rate": 9.82850604931996e-06, "loss": 17.7048, "step": 6079 }, { "epoch": 0.1111375143948672, "grad_norm": 6.3016461713514484, "learning_rate": 9.828429179723437e-06, "loss": 17.3628, "step": 6080 }, { "epoch": 0.11115579359131372, "grad_norm": 8.301755776026813, "learning_rate": 9.82835229320369e-06, "loss": 18.1444, "step": 6081 }, { "epoch": 0.11117407278776024, "grad_norm": 7.392556694316968, "learning_rate": 9.828275389760986e-06, "loss": 17.7959, "step": 6082 }, { "epoch": 0.11119235198420678, "grad_norm": 6.276445797133483, "learning_rate": 9.828198469395598e-06, "loss": 17.5543, "step": 6083 }, { "epoch": 0.1112106311806533, "grad_norm": 7.318058488520969, "learning_rate": 9.82812153210779e-06, "loss": 17.8725, "step": 6084 }, { "epoch": 0.11122891037709982, "grad_norm": 7.157296095234143, "learning_rate": 9.828044577897837e-06, "loss": 18.1602, "step": 6085 }, { "epoch": 0.11124718957354635, "grad_norm": 7.323118370588629, "learning_rate": 9.827967606766009e-06, "loss": 17.5684, "step": 6086 }, { "epoch": 0.11126546876999287, "grad_norm": 6.531660504221052, "learning_rate": 9.82789061871257e-06, "loss": 17.4564, "step": 6087 }, { "epoch": 0.11128374796643939, "grad_norm": 6.230555906074557, "learning_rate": 9.827813613737795e-06, "loss": 17.376, "step": 6088 }, { "epoch": 0.11130202716288592, "grad_norm": 6.835899987099556, "learning_rate": 9.82773659184195e-06, "loss": 18.0869, "step": 6089 }, { "epoch": 0.11132030635933245, "grad_norm": 6.9819025137206285, "learning_rate": 9.827659553025311e-06, "loss": 17.5553, "step": 6090 }, { "epoch": 0.11133858555577897, "grad_norm": 6.9195461465580825, "learning_rate": 9.827582497288142e-06, "loss": 17.494, "step": 6091 }, { "epoch": 0.11135686475222549, "grad_norm": 7.16870145138108, "learning_rate": 9.827505424630714e-06, "loss": 18.086, "step": 6092 }, { "epoch": 0.11137514394867201, "grad_norm": 7.75470910521547, "learning_rate": 9.827428335053301e-06, "loss": 17.9726, "step": 6093 }, { "epoch": 0.11139342314511855, "grad_norm": 6.356708930907695, "learning_rate": 9.827351228556168e-06, "loss": 17.4048, "step": 6094 }, { "epoch": 0.11141170234156507, "grad_norm": 8.195102079708166, "learning_rate": 9.82727410513959e-06, "loss": 17.998, "step": 6095 }, { "epoch": 0.11142998153801159, "grad_norm": 6.096094184633512, "learning_rate": 9.827196964803833e-06, "loss": 17.219, "step": 6096 }, { "epoch": 0.11144826073445811, "grad_norm": 6.789787708293502, "learning_rate": 9.827119807549172e-06, "loss": 17.4854, "step": 6097 }, { "epoch": 0.11146653993090463, "grad_norm": 7.9023625358560015, "learning_rate": 9.827042633375873e-06, "loss": 17.9394, "step": 6098 }, { "epoch": 0.11148481912735116, "grad_norm": 6.9377014505029715, "learning_rate": 9.826965442284212e-06, "loss": 17.1389, "step": 6099 }, { "epoch": 0.11150309832379769, "grad_norm": 8.037621294410371, "learning_rate": 9.826888234274452e-06, "loss": 18.2612, "step": 6100 }, { "epoch": 0.11152137752024421, "grad_norm": 7.304666527307093, "learning_rate": 9.826811009346869e-06, "loss": 17.7392, "step": 6101 }, { "epoch": 0.11153965671669074, "grad_norm": 6.3745404473633425, "learning_rate": 9.826733767501731e-06, "loss": 17.3288, "step": 6102 }, { "epoch": 0.11155793591313726, "grad_norm": 10.343701118274206, "learning_rate": 9.826656508739311e-06, "loss": 18.9521, "step": 6103 }, { "epoch": 0.11157621510958378, "grad_norm": 8.011024726129985, "learning_rate": 9.826579233059878e-06, "loss": 18.0134, "step": 6104 }, { "epoch": 0.1115944943060303, "grad_norm": 6.7895829546955175, "learning_rate": 9.826501940463706e-06, "loss": 17.5846, "step": 6105 }, { "epoch": 0.11161277350247684, "grad_norm": 7.49594915628895, "learning_rate": 9.826424630951062e-06, "loss": 17.7317, "step": 6106 }, { "epoch": 0.11163105269892336, "grad_norm": 7.049010994020828, "learning_rate": 9.826347304522219e-06, "loss": 17.5041, "step": 6107 }, { "epoch": 0.11164933189536988, "grad_norm": 6.625397145218589, "learning_rate": 9.826269961177447e-06, "loss": 17.3501, "step": 6108 }, { "epoch": 0.1116676110918164, "grad_norm": 6.79675307076588, "learning_rate": 9.826192600917017e-06, "loss": 17.8181, "step": 6109 }, { "epoch": 0.11168589028826292, "grad_norm": 6.889365418259622, "learning_rate": 9.826115223741202e-06, "loss": 17.5375, "step": 6110 }, { "epoch": 0.11170416948470946, "grad_norm": 7.6520421109127295, "learning_rate": 9.826037829650271e-06, "loss": 18.1953, "step": 6111 }, { "epoch": 0.11172244868115598, "grad_norm": 6.8452885499139215, "learning_rate": 9.825960418644495e-06, "loss": 17.5787, "step": 6112 }, { "epoch": 0.1117407278776025, "grad_norm": 6.151882389545902, "learning_rate": 9.82588299072415e-06, "loss": 17.4467, "step": 6113 }, { "epoch": 0.11175900707404902, "grad_norm": 6.83752001813512, "learning_rate": 9.8258055458895e-06, "loss": 17.3972, "step": 6114 }, { "epoch": 0.11177728627049555, "grad_norm": 7.396162960704652, "learning_rate": 9.825728084140824e-06, "loss": 17.8534, "step": 6115 }, { "epoch": 0.11179556546694207, "grad_norm": 8.660874668974742, "learning_rate": 9.825650605478386e-06, "loss": 18.5825, "step": 6116 }, { "epoch": 0.1118138446633886, "grad_norm": 7.5702717335581715, "learning_rate": 9.825573109902465e-06, "loss": 18.273, "step": 6117 }, { "epoch": 0.11183212385983513, "grad_norm": 7.7147435530331245, "learning_rate": 9.825495597413328e-06, "loss": 18.2381, "step": 6118 }, { "epoch": 0.11185040305628165, "grad_norm": 7.152143115685819, "learning_rate": 9.825418068011245e-06, "loss": 17.8437, "step": 6119 }, { "epoch": 0.11186868225272817, "grad_norm": 6.567868181370384, "learning_rate": 9.825340521696493e-06, "loss": 17.3631, "step": 6120 }, { "epoch": 0.11188696144917469, "grad_norm": 7.48768227300808, "learning_rate": 9.825262958469342e-06, "loss": 17.8795, "step": 6121 }, { "epoch": 0.11190524064562121, "grad_norm": 5.681827530665296, "learning_rate": 9.82518537833006e-06, "loss": 16.9753, "step": 6122 }, { "epoch": 0.11192351984206775, "grad_norm": 7.042510126919909, "learning_rate": 9.825107781278924e-06, "loss": 17.9713, "step": 6123 }, { "epoch": 0.11194179903851427, "grad_norm": 6.066116133662346, "learning_rate": 9.825030167316204e-06, "loss": 17.4079, "step": 6124 }, { "epoch": 0.11196007823496079, "grad_norm": 8.930087487018563, "learning_rate": 9.824952536442171e-06, "loss": 18.8651, "step": 6125 }, { "epoch": 0.11197835743140731, "grad_norm": 8.021392553948088, "learning_rate": 9.824874888657099e-06, "loss": 18.0091, "step": 6126 }, { "epoch": 0.11199663662785383, "grad_norm": 6.0462716777884795, "learning_rate": 9.824797223961259e-06, "loss": 17.4654, "step": 6127 }, { "epoch": 0.11201491582430037, "grad_norm": 7.870660144820459, "learning_rate": 9.824719542354923e-06, "loss": 18.1762, "step": 6128 }, { "epoch": 0.11203319502074689, "grad_norm": 7.304537260867494, "learning_rate": 9.824641843838364e-06, "loss": 17.9305, "step": 6129 }, { "epoch": 0.11205147421719341, "grad_norm": 6.882291977888932, "learning_rate": 9.824564128411854e-06, "loss": 17.6084, "step": 6130 }, { "epoch": 0.11206975341363994, "grad_norm": 8.883903749463636, "learning_rate": 9.824486396075665e-06, "loss": 18.6727, "step": 6131 }, { "epoch": 0.11208803261008646, "grad_norm": 7.763208221495181, "learning_rate": 9.82440864683007e-06, "loss": 18.1982, "step": 6132 }, { "epoch": 0.11210631180653298, "grad_norm": 7.494317510006399, "learning_rate": 9.824330880675341e-06, "loss": 17.5689, "step": 6133 }, { "epoch": 0.11212459100297952, "grad_norm": 6.734347902921629, "learning_rate": 9.824253097611751e-06, "loss": 17.6065, "step": 6134 }, { "epoch": 0.11214287019942604, "grad_norm": 6.903138003440501, "learning_rate": 9.824175297639573e-06, "loss": 17.7784, "step": 6135 }, { "epoch": 0.11216114939587256, "grad_norm": 8.975992327832438, "learning_rate": 9.82409748075908e-06, "loss": 18.3752, "step": 6136 }, { "epoch": 0.11217942859231908, "grad_norm": 6.789633529830011, "learning_rate": 9.824019646970543e-06, "loss": 17.6402, "step": 6137 }, { "epoch": 0.1121977077887656, "grad_norm": 6.409081435221622, "learning_rate": 9.823941796274235e-06, "loss": 17.5977, "step": 6138 }, { "epoch": 0.11221598698521212, "grad_norm": 6.116320825092029, "learning_rate": 9.823863928670431e-06, "loss": 17.6209, "step": 6139 }, { "epoch": 0.11223426618165866, "grad_norm": 7.867902646461152, "learning_rate": 9.823786044159403e-06, "loss": 18.1709, "step": 6140 }, { "epoch": 0.11225254537810518, "grad_norm": 5.947024102002984, "learning_rate": 9.823708142741422e-06, "loss": 17.2042, "step": 6141 }, { "epoch": 0.1122708245745517, "grad_norm": 6.851079550186919, "learning_rate": 9.823630224416762e-06, "loss": 17.4814, "step": 6142 }, { "epoch": 0.11228910377099822, "grad_norm": 7.498051350606899, "learning_rate": 9.823552289185699e-06, "loss": 18.1426, "step": 6143 }, { "epoch": 0.11230738296744475, "grad_norm": 6.564015601323493, "learning_rate": 9.823474337048502e-06, "loss": 17.9036, "step": 6144 }, { "epoch": 0.11232566216389128, "grad_norm": 7.85380257407971, "learning_rate": 9.823396368005445e-06, "loss": 17.9739, "step": 6145 }, { "epoch": 0.1123439413603378, "grad_norm": 7.5435428933914475, "learning_rate": 9.823318382056803e-06, "loss": 17.4072, "step": 6146 }, { "epoch": 0.11236222055678433, "grad_norm": 7.9263872401537645, "learning_rate": 9.823240379202851e-06, "loss": 18.1183, "step": 6147 }, { "epoch": 0.11238049975323085, "grad_norm": 6.175677186935788, "learning_rate": 9.823162359443858e-06, "loss": 17.1602, "step": 6148 }, { "epoch": 0.11239877894967737, "grad_norm": 8.336031690932213, "learning_rate": 9.8230843227801e-06, "loss": 18.207, "step": 6149 }, { "epoch": 0.11241705814612389, "grad_norm": 7.549068160390573, "learning_rate": 9.82300626921185e-06, "loss": 17.9894, "step": 6150 }, { "epoch": 0.11243533734257043, "grad_norm": 5.67824785647607, "learning_rate": 9.822928198739381e-06, "loss": 17.1215, "step": 6151 }, { "epoch": 0.11245361653901695, "grad_norm": 6.724954031901365, "learning_rate": 9.822850111362968e-06, "loss": 17.609, "step": 6152 }, { "epoch": 0.11247189573546347, "grad_norm": 5.731028461003114, "learning_rate": 9.822772007082883e-06, "loss": 17.1988, "step": 6153 }, { "epoch": 0.11249017493190999, "grad_norm": 7.958600282388625, "learning_rate": 9.8226938858994e-06, "loss": 18.0315, "step": 6154 }, { "epoch": 0.11250845412835651, "grad_norm": 5.418456092849641, "learning_rate": 9.822615747812794e-06, "loss": 17.0944, "step": 6155 }, { "epoch": 0.11252673332480304, "grad_norm": 6.250556961926755, "learning_rate": 9.82253759282334e-06, "loss": 17.3175, "step": 6156 }, { "epoch": 0.11254501252124957, "grad_norm": 7.435879048175508, "learning_rate": 9.822459420931308e-06, "loss": 17.8584, "step": 6157 }, { "epoch": 0.11256329171769609, "grad_norm": 7.8249285387608305, "learning_rate": 9.822381232136974e-06, "loss": 17.9644, "step": 6158 }, { "epoch": 0.11258157091414261, "grad_norm": 6.8331697717654984, "learning_rate": 9.822303026440614e-06, "loss": 17.6083, "step": 6159 }, { "epoch": 0.11259985011058914, "grad_norm": 6.290695489058567, "learning_rate": 9.822224803842501e-06, "loss": 17.3292, "step": 6160 }, { "epoch": 0.11261812930703566, "grad_norm": 6.905225875045079, "learning_rate": 9.822146564342907e-06, "loss": 17.5662, "step": 6161 }, { "epoch": 0.1126364085034822, "grad_norm": 7.702543026983446, "learning_rate": 9.822068307942107e-06, "loss": 18.0518, "step": 6162 }, { "epoch": 0.11265468769992872, "grad_norm": 9.555595004282456, "learning_rate": 9.821990034640377e-06, "loss": 18.3942, "step": 6163 }, { "epoch": 0.11267296689637524, "grad_norm": 7.863144492117609, "learning_rate": 9.82191174443799e-06, "loss": 18.304, "step": 6164 }, { "epoch": 0.11269124609282176, "grad_norm": 6.460215556460458, "learning_rate": 9.821833437335222e-06, "loss": 17.5291, "step": 6165 }, { "epoch": 0.11270952528926828, "grad_norm": 8.68694114150609, "learning_rate": 9.821755113332346e-06, "loss": 18.0716, "step": 6166 }, { "epoch": 0.1127278044857148, "grad_norm": 6.3340952582156484, "learning_rate": 9.821676772429635e-06, "loss": 17.5139, "step": 6167 }, { "epoch": 0.11274608368216134, "grad_norm": 8.46967776440184, "learning_rate": 9.821598414627366e-06, "loss": 18.1872, "step": 6168 }, { "epoch": 0.11276436287860786, "grad_norm": 6.4976312789064865, "learning_rate": 9.821520039925813e-06, "loss": 17.483, "step": 6169 }, { "epoch": 0.11278264207505438, "grad_norm": 7.102957846646031, "learning_rate": 9.821441648325251e-06, "loss": 17.8098, "step": 6170 }, { "epoch": 0.1128009212715009, "grad_norm": 8.305200924217363, "learning_rate": 9.821363239825955e-06, "loss": 18.351, "step": 6171 }, { "epoch": 0.11281920046794743, "grad_norm": 6.250879529314112, "learning_rate": 9.821284814428198e-06, "loss": 17.3108, "step": 6172 }, { "epoch": 0.11283747966439395, "grad_norm": 7.34027541996215, "learning_rate": 9.821206372132256e-06, "loss": 17.4873, "step": 6173 }, { "epoch": 0.11285575886084048, "grad_norm": 6.9034516263348475, "learning_rate": 9.821127912938406e-06, "loss": 17.8491, "step": 6174 }, { "epoch": 0.112874038057287, "grad_norm": 6.760558702840803, "learning_rate": 9.82104943684692e-06, "loss": 17.6755, "step": 6175 }, { "epoch": 0.11289231725373353, "grad_norm": 7.475051783595172, "learning_rate": 9.820970943858074e-06, "loss": 17.6946, "step": 6176 }, { "epoch": 0.11291059645018005, "grad_norm": 8.605565217503512, "learning_rate": 9.820892433972143e-06, "loss": 18.4892, "step": 6177 }, { "epoch": 0.11292887564662657, "grad_norm": 7.463849238422301, "learning_rate": 9.820813907189401e-06, "loss": 17.7594, "step": 6178 }, { "epoch": 0.1129471548430731, "grad_norm": 6.493951292443605, "learning_rate": 9.820735363510128e-06, "loss": 17.5372, "step": 6179 }, { "epoch": 0.11296543403951963, "grad_norm": 6.778429363469027, "learning_rate": 9.820656802934593e-06, "loss": 17.8115, "step": 6180 }, { "epoch": 0.11298371323596615, "grad_norm": 5.498516120360731, "learning_rate": 9.820578225463076e-06, "loss": 17.1009, "step": 6181 }, { "epoch": 0.11300199243241267, "grad_norm": 6.7522866139760644, "learning_rate": 9.820499631095847e-06, "loss": 17.7332, "step": 6182 }, { "epoch": 0.11302027162885919, "grad_norm": 7.128485909347858, "learning_rate": 9.82042101983319e-06, "loss": 17.7156, "step": 6183 }, { "epoch": 0.11303855082530571, "grad_norm": 6.56856444299014, "learning_rate": 9.820342391675373e-06, "loss": 17.7751, "step": 6184 }, { "epoch": 0.11305683002175225, "grad_norm": 6.936302922317391, "learning_rate": 9.820263746622674e-06, "loss": 17.6575, "step": 6185 }, { "epoch": 0.11307510921819877, "grad_norm": 7.840124363272216, "learning_rate": 9.82018508467537e-06, "loss": 18.32, "step": 6186 }, { "epoch": 0.1130933884146453, "grad_norm": 6.888341902062848, "learning_rate": 9.820106405833735e-06, "loss": 17.6309, "step": 6187 }, { "epoch": 0.11311166761109182, "grad_norm": 7.271992003510105, "learning_rate": 9.820027710098045e-06, "loss": 17.9614, "step": 6188 }, { "epoch": 0.11312994680753834, "grad_norm": 8.010739408017542, "learning_rate": 9.819948997468577e-06, "loss": 18.4273, "step": 6189 }, { "epoch": 0.11314822600398486, "grad_norm": 8.042282447923073, "learning_rate": 9.819870267945605e-06, "loss": 18.3378, "step": 6190 }, { "epoch": 0.1131665052004314, "grad_norm": 7.8011236944362325, "learning_rate": 9.819791521529408e-06, "loss": 18.1001, "step": 6191 }, { "epoch": 0.11318478439687792, "grad_norm": 7.728160979327711, "learning_rate": 9.819712758220257e-06, "loss": 17.9143, "step": 6192 }, { "epoch": 0.11320306359332444, "grad_norm": 6.325758801504882, "learning_rate": 9.819633978018432e-06, "loss": 17.5259, "step": 6193 }, { "epoch": 0.11322134278977096, "grad_norm": 5.32983254100174, "learning_rate": 9.81955518092421e-06, "loss": 17.1029, "step": 6194 }, { "epoch": 0.11323962198621748, "grad_norm": 6.033918154854671, "learning_rate": 9.819476366937863e-06, "loss": 17.5463, "step": 6195 }, { "epoch": 0.11325790118266402, "grad_norm": 8.331254585623563, "learning_rate": 9.81939753605967e-06, "loss": 18.2479, "step": 6196 }, { "epoch": 0.11327618037911054, "grad_norm": 8.115214118595839, "learning_rate": 9.819318688289907e-06, "loss": 18.2687, "step": 6197 }, { "epoch": 0.11329445957555706, "grad_norm": 7.051646875887396, "learning_rate": 9.819239823628852e-06, "loss": 17.8834, "step": 6198 }, { "epoch": 0.11331273877200358, "grad_norm": 9.367605477448407, "learning_rate": 9.819160942076778e-06, "loss": 18.6733, "step": 6199 }, { "epoch": 0.1133310179684501, "grad_norm": 9.731200169257715, "learning_rate": 9.819082043633963e-06, "loss": 18.1608, "step": 6200 }, { "epoch": 0.11334929716489663, "grad_norm": 7.864027832001035, "learning_rate": 9.819003128300684e-06, "loss": 18.2864, "step": 6201 }, { "epoch": 0.11336757636134316, "grad_norm": 7.5567191452454345, "learning_rate": 9.818924196077215e-06, "loss": 18.0862, "step": 6202 }, { "epoch": 0.11338585555778968, "grad_norm": 6.385528665166163, "learning_rate": 9.818845246963838e-06, "loss": 17.6081, "step": 6203 }, { "epoch": 0.1134041347542362, "grad_norm": 8.142955437200175, "learning_rate": 9.818766280960825e-06, "loss": 17.981, "step": 6204 }, { "epoch": 0.11342241395068273, "grad_norm": 6.624707244814315, "learning_rate": 9.818687298068454e-06, "loss": 17.707, "step": 6205 }, { "epoch": 0.11344069314712925, "grad_norm": 9.578494114196964, "learning_rate": 9.818608298287004e-06, "loss": 17.9611, "step": 6206 }, { "epoch": 0.11345897234357577, "grad_norm": 8.46016392350176, "learning_rate": 9.81852928161675e-06, "loss": 18.2633, "step": 6207 }, { "epoch": 0.1134772515400223, "grad_norm": 6.596661582164722, "learning_rate": 9.818450248057967e-06, "loss": 17.3136, "step": 6208 }, { "epoch": 0.11349553073646883, "grad_norm": 7.295401665588104, "learning_rate": 9.818371197610935e-06, "loss": 18.0474, "step": 6209 }, { "epoch": 0.11351380993291535, "grad_norm": 7.639919598354987, "learning_rate": 9.81829213027593e-06, "loss": 17.8791, "step": 6210 }, { "epoch": 0.11353208912936187, "grad_norm": 6.652570304730243, "learning_rate": 9.818213046053228e-06, "loss": 17.9262, "step": 6211 }, { "epoch": 0.11355036832580839, "grad_norm": 6.35751685274225, "learning_rate": 9.81813394494311e-06, "loss": 17.6467, "step": 6212 }, { "epoch": 0.11356864752225493, "grad_norm": 6.7906346431503195, "learning_rate": 9.818054826945848e-06, "loss": 17.6755, "step": 6213 }, { "epoch": 0.11358692671870145, "grad_norm": 6.576625503696223, "learning_rate": 9.817975692061723e-06, "loss": 17.6876, "step": 6214 }, { "epoch": 0.11360520591514797, "grad_norm": 7.090944854282477, "learning_rate": 9.817896540291013e-06, "loss": 17.7948, "step": 6215 }, { "epoch": 0.1136234851115945, "grad_norm": 6.508156125368024, "learning_rate": 9.817817371633992e-06, "loss": 17.5299, "step": 6216 }, { "epoch": 0.11364176430804102, "grad_norm": 7.270076954305761, "learning_rate": 9.81773818609094e-06, "loss": 17.8332, "step": 6217 }, { "epoch": 0.11366004350448754, "grad_norm": 6.599159646684854, "learning_rate": 9.817658983662132e-06, "loss": 17.6176, "step": 6218 }, { "epoch": 0.11367832270093407, "grad_norm": 7.421039349125596, "learning_rate": 9.817579764347849e-06, "loss": 17.7094, "step": 6219 }, { "epoch": 0.1136966018973806, "grad_norm": 7.875083752384193, "learning_rate": 9.817500528148365e-06, "loss": 18.0289, "step": 6220 }, { "epoch": 0.11371488109382712, "grad_norm": 7.348782010366022, "learning_rate": 9.817421275063962e-06, "loss": 17.9597, "step": 6221 }, { "epoch": 0.11373316029027364, "grad_norm": 6.3017741227720165, "learning_rate": 9.817342005094915e-06, "loss": 17.4832, "step": 6222 }, { "epoch": 0.11375143948672016, "grad_norm": 7.030648821542163, "learning_rate": 9.8172627182415e-06, "loss": 17.8352, "step": 6223 }, { "epoch": 0.11376971868316668, "grad_norm": 7.744853782477074, "learning_rate": 9.817183414504e-06, "loss": 18.175, "step": 6224 }, { "epoch": 0.11378799787961322, "grad_norm": 6.717189735431535, "learning_rate": 9.81710409388269e-06, "loss": 17.5284, "step": 6225 }, { "epoch": 0.11380627707605974, "grad_norm": 6.592936080943766, "learning_rate": 9.817024756377847e-06, "loss": 17.6753, "step": 6226 }, { "epoch": 0.11382455627250626, "grad_norm": 7.49766059181947, "learning_rate": 9.81694540198975e-06, "loss": 17.4403, "step": 6227 }, { "epoch": 0.11384283546895278, "grad_norm": 8.463039411504896, "learning_rate": 9.81686603071868e-06, "loss": 17.8722, "step": 6228 }, { "epoch": 0.1138611146653993, "grad_norm": 5.78079434548121, "learning_rate": 9.816786642564909e-06, "loss": 17.2611, "step": 6229 }, { "epoch": 0.11387939386184584, "grad_norm": 6.61962245135833, "learning_rate": 9.816707237528719e-06, "loss": 17.7266, "step": 6230 }, { "epoch": 0.11389767305829236, "grad_norm": 7.793304355250502, "learning_rate": 9.81662781561039e-06, "loss": 17.872, "step": 6231 }, { "epoch": 0.11391595225473888, "grad_norm": 6.104698234880663, "learning_rate": 9.816548376810199e-06, "loss": 17.2371, "step": 6232 }, { "epoch": 0.1139342314511854, "grad_norm": 7.53738109773018, "learning_rate": 9.816468921128422e-06, "loss": 17.5866, "step": 6233 }, { "epoch": 0.11395251064763193, "grad_norm": 7.47770185312477, "learning_rate": 9.81638944856534e-06, "loss": 18.0712, "step": 6234 }, { "epoch": 0.11397078984407845, "grad_norm": 9.253215609884657, "learning_rate": 9.816309959121231e-06, "loss": 18.1412, "step": 6235 }, { "epoch": 0.11398906904052498, "grad_norm": 6.569894728303271, "learning_rate": 9.816230452796373e-06, "loss": 17.4748, "step": 6236 }, { "epoch": 0.1140073482369715, "grad_norm": 9.180895486113846, "learning_rate": 9.816150929591046e-06, "loss": 17.4326, "step": 6237 }, { "epoch": 0.11402562743341803, "grad_norm": 7.557170943012073, "learning_rate": 9.816071389505529e-06, "loss": 17.9139, "step": 6238 }, { "epoch": 0.11404390662986455, "grad_norm": 7.487304070854714, "learning_rate": 9.815991832540098e-06, "loss": 17.8918, "step": 6239 }, { "epoch": 0.11406218582631107, "grad_norm": 7.379009782163196, "learning_rate": 9.815912258695034e-06, "loss": 17.9594, "step": 6240 }, { "epoch": 0.1140804650227576, "grad_norm": 6.995329675365152, "learning_rate": 9.815832667970615e-06, "loss": 17.6184, "step": 6241 }, { "epoch": 0.11409874421920413, "grad_norm": 10.00764842834887, "learning_rate": 9.815753060367122e-06, "loss": 18.0156, "step": 6242 }, { "epoch": 0.11411702341565065, "grad_norm": 6.87247120564153, "learning_rate": 9.815673435884831e-06, "loss": 17.6839, "step": 6243 }, { "epoch": 0.11413530261209717, "grad_norm": 6.830404337435096, "learning_rate": 9.815593794524022e-06, "loss": 18.0374, "step": 6244 }, { "epoch": 0.1141535818085437, "grad_norm": 7.639694761758586, "learning_rate": 9.815514136284977e-06, "loss": 17.8666, "step": 6245 }, { "epoch": 0.11417186100499022, "grad_norm": 6.708206899982387, "learning_rate": 9.81543446116797e-06, "loss": 17.6746, "step": 6246 }, { "epoch": 0.11419014020143675, "grad_norm": 6.541288388294831, "learning_rate": 9.815354769173284e-06, "loss": 17.2896, "step": 6247 }, { "epoch": 0.11420841939788327, "grad_norm": 6.76058205704926, "learning_rate": 9.815275060301198e-06, "loss": 17.5915, "step": 6248 }, { "epoch": 0.1142266985943298, "grad_norm": 7.19369988720146, "learning_rate": 9.81519533455199e-06, "loss": 17.7179, "step": 6249 }, { "epoch": 0.11424497779077632, "grad_norm": 7.1423211120451615, "learning_rate": 9.81511559192594e-06, "loss": 17.7816, "step": 6250 }, { "epoch": 0.11426325698722284, "grad_norm": 7.522866178135697, "learning_rate": 9.815035832423329e-06, "loss": 18.0128, "step": 6251 }, { "epoch": 0.11428153618366936, "grad_norm": 7.608876775665035, "learning_rate": 9.814956056044433e-06, "loss": 17.9453, "step": 6252 }, { "epoch": 0.1142998153801159, "grad_norm": 7.774338330810066, "learning_rate": 9.814876262789537e-06, "loss": 18.0482, "step": 6253 }, { "epoch": 0.11431809457656242, "grad_norm": 8.76759266751559, "learning_rate": 9.814796452658915e-06, "loss": 18.689, "step": 6254 }, { "epoch": 0.11433637377300894, "grad_norm": 6.949687430786227, "learning_rate": 9.81471662565285e-06, "loss": 17.6662, "step": 6255 }, { "epoch": 0.11435465296945546, "grad_norm": 7.502716402019781, "learning_rate": 9.814636781771621e-06, "loss": 17.7238, "step": 6256 }, { "epoch": 0.11437293216590198, "grad_norm": 7.893843327867272, "learning_rate": 9.814556921015509e-06, "loss": 18.1044, "step": 6257 }, { "epoch": 0.1143912113623485, "grad_norm": 8.017401807950714, "learning_rate": 9.814477043384791e-06, "loss": 17.8748, "step": 6258 }, { "epoch": 0.11440949055879504, "grad_norm": 6.854928440046626, "learning_rate": 9.814397148879751e-06, "loss": 17.5991, "step": 6259 }, { "epoch": 0.11442776975524156, "grad_norm": 10.515771082598214, "learning_rate": 9.814317237500664e-06, "loss": 18.2981, "step": 6260 }, { "epoch": 0.11444604895168808, "grad_norm": 6.037075326124429, "learning_rate": 9.814237309247814e-06, "loss": 17.3793, "step": 6261 }, { "epoch": 0.1144643281481346, "grad_norm": 6.462855231352157, "learning_rate": 9.81415736412148e-06, "loss": 17.5027, "step": 6262 }, { "epoch": 0.11448260734458113, "grad_norm": 6.400781963784588, "learning_rate": 9.814077402121943e-06, "loss": 17.5447, "step": 6263 }, { "epoch": 0.11450088654102766, "grad_norm": 7.292791950012517, "learning_rate": 9.813997423249482e-06, "loss": 17.9095, "step": 6264 }, { "epoch": 0.11451916573747419, "grad_norm": 7.009803397848102, "learning_rate": 9.813917427504378e-06, "loss": 17.8501, "step": 6265 }, { "epoch": 0.1145374449339207, "grad_norm": 6.570670431757007, "learning_rate": 9.813837414886909e-06, "loss": 17.7744, "step": 6266 }, { "epoch": 0.11455572413036723, "grad_norm": 8.574922859793212, "learning_rate": 9.81375738539736e-06, "loss": 17.9815, "step": 6267 }, { "epoch": 0.11457400332681375, "grad_norm": 6.7876422049688285, "learning_rate": 9.813677339036009e-06, "loss": 17.5424, "step": 6268 }, { "epoch": 0.11459228252326027, "grad_norm": 8.11468122302412, "learning_rate": 9.813597275803135e-06, "loss": 18.5315, "step": 6269 }, { "epoch": 0.11461056171970681, "grad_norm": 6.6613413290634895, "learning_rate": 9.813517195699022e-06, "loss": 17.5249, "step": 6270 }, { "epoch": 0.11462884091615333, "grad_norm": 6.895460372431868, "learning_rate": 9.813437098723948e-06, "loss": 17.5528, "step": 6271 }, { "epoch": 0.11464712011259985, "grad_norm": 6.422300246062773, "learning_rate": 9.813356984878196e-06, "loss": 17.1556, "step": 6272 }, { "epoch": 0.11466539930904637, "grad_norm": 7.886666955754807, "learning_rate": 9.813276854162043e-06, "loss": 18.1806, "step": 6273 }, { "epoch": 0.1146836785054929, "grad_norm": 7.4543683702574794, "learning_rate": 9.813196706575774e-06, "loss": 17.779, "step": 6274 }, { "epoch": 0.11470195770193942, "grad_norm": 7.156585439808601, "learning_rate": 9.813116542119666e-06, "loss": 17.9412, "step": 6275 }, { "epoch": 0.11472023689838595, "grad_norm": 9.138870548783235, "learning_rate": 9.813036360794007e-06, "loss": 18.5025, "step": 6276 }, { "epoch": 0.11473851609483247, "grad_norm": 6.259275145245058, "learning_rate": 9.81295616259907e-06, "loss": 17.3672, "step": 6277 }, { "epoch": 0.114756795291279, "grad_norm": 7.411431181384231, "learning_rate": 9.812875947535138e-06, "loss": 17.8309, "step": 6278 }, { "epoch": 0.11477507448772552, "grad_norm": 7.031345429104616, "learning_rate": 9.812795715602495e-06, "loss": 17.736, "step": 6279 }, { "epoch": 0.11479335368417204, "grad_norm": 6.921873970480513, "learning_rate": 9.812715466801422e-06, "loss": 17.3918, "step": 6280 }, { "epoch": 0.11481163288061857, "grad_norm": 7.009409342805002, "learning_rate": 9.812635201132197e-06, "loss": 17.6489, "step": 6281 }, { "epoch": 0.1148299120770651, "grad_norm": 6.468373114451255, "learning_rate": 9.812554918595103e-06, "loss": 17.5001, "step": 6282 }, { "epoch": 0.11484819127351162, "grad_norm": 6.844055770329313, "learning_rate": 9.812474619190422e-06, "loss": 17.5318, "step": 6283 }, { "epoch": 0.11486647046995814, "grad_norm": 5.93876635730014, "learning_rate": 9.812394302918436e-06, "loss": 17.2376, "step": 6284 }, { "epoch": 0.11488474966640466, "grad_norm": 7.332919688746114, "learning_rate": 9.812313969779426e-06, "loss": 17.7357, "step": 6285 }, { "epoch": 0.11490302886285118, "grad_norm": 7.43211164073885, "learning_rate": 9.812233619773673e-06, "loss": 17.6347, "step": 6286 }, { "epoch": 0.11492130805929772, "grad_norm": 6.855720150266296, "learning_rate": 9.812153252901457e-06, "loss": 17.539, "step": 6287 }, { "epoch": 0.11493958725574424, "grad_norm": 7.8882473645624565, "learning_rate": 9.812072869163063e-06, "loss": 17.6707, "step": 6288 }, { "epoch": 0.11495786645219076, "grad_norm": 9.086128872398263, "learning_rate": 9.811992468558769e-06, "loss": 18.2784, "step": 6289 }, { "epoch": 0.11497614564863728, "grad_norm": 8.208285616076992, "learning_rate": 9.811912051088861e-06, "loss": 18.4073, "step": 6290 }, { "epoch": 0.1149944248450838, "grad_norm": 9.03003262984523, "learning_rate": 9.811831616753618e-06, "loss": 18.3147, "step": 6291 }, { "epoch": 0.11501270404153033, "grad_norm": 7.348070894525367, "learning_rate": 9.811751165553322e-06, "loss": 17.6348, "step": 6292 }, { "epoch": 0.11503098323797686, "grad_norm": 6.825424625846023, "learning_rate": 9.811670697488258e-06, "loss": 17.5614, "step": 6293 }, { "epoch": 0.11504926243442339, "grad_norm": 7.763856822305459, "learning_rate": 9.811590212558704e-06, "loss": 17.9533, "step": 6294 }, { "epoch": 0.11506754163086991, "grad_norm": 6.819436018457505, "learning_rate": 9.811509710764945e-06, "loss": 17.4559, "step": 6295 }, { "epoch": 0.11508582082731643, "grad_norm": 6.698750109017797, "learning_rate": 9.81142919210726e-06, "loss": 17.5214, "step": 6296 }, { "epoch": 0.11510410002376295, "grad_norm": 7.438442475434122, "learning_rate": 9.811348656585936e-06, "loss": 18.0474, "step": 6297 }, { "epoch": 0.11512237922020949, "grad_norm": 7.1820486597397055, "learning_rate": 9.81126810420125e-06, "loss": 17.9241, "step": 6298 }, { "epoch": 0.11514065841665601, "grad_norm": 6.977489322993325, "learning_rate": 9.811187534953488e-06, "loss": 17.8234, "step": 6299 }, { "epoch": 0.11515893761310253, "grad_norm": 6.914916530668801, "learning_rate": 9.811106948842931e-06, "loss": 17.5373, "step": 6300 }, { "epoch": 0.11517721680954905, "grad_norm": 7.063726615733678, "learning_rate": 9.811026345869862e-06, "loss": 17.5611, "step": 6301 }, { "epoch": 0.11519549600599557, "grad_norm": 7.1751863227292905, "learning_rate": 9.810945726034563e-06, "loss": 17.7032, "step": 6302 }, { "epoch": 0.1152137752024421, "grad_norm": 6.682785236121788, "learning_rate": 9.810865089337316e-06, "loss": 17.662, "step": 6303 }, { "epoch": 0.11523205439888863, "grad_norm": 8.435816868510173, "learning_rate": 9.810784435778404e-06, "loss": 18.1695, "step": 6304 }, { "epoch": 0.11525033359533515, "grad_norm": 6.802208917897139, "learning_rate": 9.810703765358111e-06, "loss": 17.7794, "step": 6305 }, { "epoch": 0.11526861279178167, "grad_norm": 8.180548363814108, "learning_rate": 9.810623078076719e-06, "loss": 18.2576, "step": 6306 }, { "epoch": 0.1152868919882282, "grad_norm": 7.930981478452358, "learning_rate": 9.810542373934511e-06, "loss": 18.0729, "step": 6307 }, { "epoch": 0.11530517118467472, "grad_norm": 6.838572780336669, "learning_rate": 9.810461652931768e-06, "loss": 17.4724, "step": 6308 }, { "epoch": 0.11532345038112124, "grad_norm": 8.068676228106554, "learning_rate": 9.810380915068775e-06, "loss": 17.8672, "step": 6309 }, { "epoch": 0.11534172957756778, "grad_norm": 7.196540776256688, "learning_rate": 9.810300160345814e-06, "loss": 17.9999, "step": 6310 }, { "epoch": 0.1153600087740143, "grad_norm": 12.344777323849373, "learning_rate": 9.810219388763168e-06, "loss": 18.4425, "step": 6311 }, { "epoch": 0.11537828797046082, "grad_norm": 6.833460845059853, "learning_rate": 9.810138600321122e-06, "loss": 17.389, "step": 6312 }, { "epoch": 0.11539656716690734, "grad_norm": 7.2852103617293, "learning_rate": 9.810057795019956e-06, "loss": 17.7295, "step": 6313 }, { "epoch": 0.11541484636335386, "grad_norm": 7.220185693867019, "learning_rate": 9.809976972859956e-06, "loss": 18.0016, "step": 6314 }, { "epoch": 0.1154331255598004, "grad_norm": 7.8150757138486115, "learning_rate": 9.809896133841404e-06, "loss": 17.8048, "step": 6315 }, { "epoch": 0.11545140475624692, "grad_norm": 5.557317761478397, "learning_rate": 9.809815277964582e-06, "loss": 17.2288, "step": 6316 }, { "epoch": 0.11546968395269344, "grad_norm": 7.061668131990722, "learning_rate": 9.809734405229776e-06, "loss": 17.369, "step": 6317 }, { "epoch": 0.11548796314913996, "grad_norm": 7.244504281890309, "learning_rate": 9.809653515637268e-06, "loss": 18.0361, "step": 6318 }, { "epoch": 0.11550624234558649, "grad_norm": 8.180238124252234, "learning_rate": 9.809572609187341e-06, "loss": 18.1929, "step": 6319 }, { "epoch": 0.115524521542033, "grad_norm": 7.531069188380508, "learning_rate": 9.80949168588028e-06, "loss": 17.9694, "step": 6320 }, { "epoch": 0.11554280073847954, "grad_norm": 6.1840911491435095, "learning_rate": 9.809410745716367e-06, "loss": 17.4163, "step": 6321 }, { "epoch": 0.11556107993492606, "grad_norm": 5.799217051722377, "learning_rate": 9.80932978869589e-06, "loss": 17.3084, "step": 6322 }, { "epoch": 0.11557935913137259, "grad_norm": 6.4627063889616165, "learning_rate": 9.809248814819126e-06, "loss": 17.5635, "step": 6323 }, { "epoch": 0.11559763832781911, "grad_norm": 7.339022253426556, "learning_rate": 9.809167824086365e-06, "loss": 18.1535, "step": 6324 }, { "epoch": 0.11561591752426563, "grad_norm": 8.484544218942316, "learning_rate": 9.809086816497886e-06, "loss": 18.4038, "step": 6325 }, { "epoch": 0.11563419672071215, "grad_norm": 7.2863558889769875, "learning_rate": 9.809005792053976e-06, "loss": 17.6745, "step": 6326 }, { "epoch": 0.11565247591715869, "grad_norm": 8.3274098521248, "learning_rate": 9.808924750754918e-06, "loss": 17.3246, "step": 6327 }, { "epoch": 0.11567075511360521, "grad_norm": 6.559594966952365, "learning_rate": 9.808843692600995e-06, "loss": 17.5227, "step": 6328 }, { "epoch": 0.11568903431005173, "grad_norm": 6.639002728992262, "learning_rate": 9.808762617592494e-06, "loss": 17.4937, "step": 6329 }, { "epoch": 0.11570731350649825, "grad_norm": 6.373177086925029, "learning_rate": 9.808681525729696e-06, "loss": 17.3868, "step": 6330 }, { "epoch": 0.11572559270294477, "grad_norm": 9.208906527418707, "learning_rate": 9.808600417012886e-06, "loss": 19.1502, "step": 6331 }, { "epoch": 0.11574387189939131, "grad_norm": 6.392016892776988, "learning_rate": 9.80851929144235e-06, "loss": 17.2708, "step": 6332 }, { "epoch": 0.11576215109583783, "grad_norm": 9.832547492016513, "learning_rate": 9.80843814901837e-06, "loss": 18.5383, "step": 6333 }, { "epoch": 0.11578043029228435, "grad_norm": 7.692122147960202, "learning_rate": 9.808356989741231e-06, "loss": 18.2824, "step": 6334 }, { "epoch": 0.11579870948873087, "grad_norm": 7.55166145720147, "learning_rate": 9.80827581361122e-06, "loss": 17.9764, "step": 6335 }, { "epoch": 0.1158169886851774, "grad_norm": 6.364236199278281, "learning_rate": 9.808194620628619e-06, "loss": 17.2458, "step": 6336 }, { "epoch": 0.11583526788162392, "grad_norm": 6.98256771151916, "learning_rate": 9.808113410793713e-06, "loss": 17.7631, "step": 6337 }, { "epoch": 0.11585354707807045, "grad_norm": 8.25863248027287, "learning_rate": 9.808032184106786e-06, "loss": 18.33, "step": 6338 }, { "epoch": 0.11587182627451698, "grad_norm": 6.55601754729298, "learning_rate": 9.807950940568124e-06, "loss": 17.4528, "step": 6339 }, { "epoch": 0.1158901054709635, "grad_norm": 6.976318082824107, "learning_rate": 9.80786968017801e-06, "loss": 17.8263, "step": 6340 }, { "epoch": 0.11590838466741002, "grad_norm": 6.232620318070562, "learning_rate": 9.807788402936732e-06, "loss": 17.338, "step": 6341 }, { "epoch": 0.11592666386385654, "grad_norm": 6.2455593197647925, "learning_rate": 9.807707108844572e-06, "loss": 17.5911, "step": 6342 }, { "epoch": 0.11594494306030306, "grad_norm": 6.7502933548553745, "learning_rate": 9.807625797901817e-06, "loss": 17.7904, "step": 6343 }, { "epoch": 0.1159632222567496, "grad_norm": 7.082728421548852, "learning_rate": 9.807544470108748e-06, "loss": 17.7445, "step": 6344 }, { "epoch": 0.11598150145319612, "grad_norm": 6.19434001023653, "learning_rate": 9.807463125465655e-06, "loss": 17.1742, "step": 6345 }, { "epoch": 0.11599978064964264, "grad_norm": 6.948315780633028, "learning_rate": 9.80738176397282e-06, "loss": 17.83, "step": 6346 }, { "epoch": 0.11601805984608916, "grad_norm": 7.265773545397512, "learning_rate": 9.80730038563053e-06, "loss": 17.7742, "step": 6347 }, { "epoch": 0.11603633904253569, "grad_norm": 6.466850105613359, "learning_rate": 9.807218990439068e-06, "loss": 17.6626, "step": 6348 }, { "epoch": 0.11605461823898222, "grad_norm": 6.787913398772441, "learning_rate": 9.80713757839872e-06, "loss": 17.5217, "step": 6349 }, { "epoch": 0.11607289743542874, "grad_norm": 7.117216201535235, "learning_rate": 9.807056149509775e-06, "loss": 17.6632, "step": 6350 }, { "epoch": 0.11609117663187526, "grad_norm": 5.943020646342875, "learning_rate": 9.806974703772513e-06, "loss": 17.3552, "step": 6351 }, { "epoch": 0.11610945582832179, "grad_norm": 6.792120512788166, "learning_rate": 9.806893241187223e-06, "loss": 17.508, "step": 6352 }, { "epoch": 0.11612773502476831, "grad_norm": 7.289555594562179, "learning_rate": 9.806811761754188e-06, "loss": 17.7113, "step": 6353 }, { "epoch": 0.11614601422121483, "grad_norm": 7.061349160039012, "learning_rate": 9.806730265473694e-06, "loss": 17.7871, "step": 6354 }, { "epoch": 0.11616429341766137, "grad_norm": 7.775418162119797, "learning_rate": 9.806648752346029e-06, "loss": 18.329, "step": 6355 }, { "epoch": 0.11618257261410789, "grad_norm": 5.823594541185173, "learning_rate": 9.806567222371478e-06, "loss": 17.2493, "step": 6356 }, { "epoch": 0.11620085181055441, "grad_norm": 9.340370280309296, "learning_rate": 9.806485675550326e-06, "loss": 18.6586, "step": 6357 }, { "epoch": 0.11621913100700093, "grad_norm": 6.957343250007208, "learning_rate": 9.806404111882857e-06, "loss": 17.7079, "step": 6358 }, { "epoch": 0.11623741020344745, "grad_norm": 7.179960364024654, "learning_rate": 9.80632253136936e-06, "loss": 17.9265, "step": 6359 }, { "epoch": 0.11625568939989397, "grad_norm": 7.021714206245028, "learning_rate": 9.806240934010118e-06, "loss": 17.7393, "step": 6360 }, { "epoch": 0.11627396859634051, "grad_norm": 6.334167341151742, "learning_rate": 9.806159319805421e-06, "loss": 17.5247, "step": 6361 }, { "epoch": 0.11629224779278703, "grad_norm": 7.539256044216627, "learning_rate": 9.80607768875555e-06, "loss": 17.9543, "step": 6362 }, { "epoch": 0.11631052698923355, "grad_norm": 7.686195639813259, "learning_rate": 9.805996040860796e-06, "loss": 18.1302, "step": 6363 }, { "epoch": 0.11632880618568008, "grad_norm": 6.545812377155543, "learning_rate": 9.805914376121443e-06, "loss": 17.5107, "step": 6364 }, { "epoch": 0.1163470853821266, "grad_norm": 7.668924002686527, "learning_rate": 9.805832694537777e-06, "loss": 18.2149, "step": 6365 }, { "epoch": 0.11636536457857313, "grad_norm": 7.675284574438888, "learning_rate": 9.805750996110082e-06, "loss": 18.0789, "step": 6366 }, { "epoch": 0.11638364377501965, "grad_norm": 6.274208863681385, "learning_rate": 9.80566928083865e-06, "loss": 17.5136, "step": 6367 }, { "epoch": 0.11640192297146618, "grad_norm": 6.975382543720655, "learning_rate": 9.805587548723763e-06, "loss": 17.9814, "step": 6368 }, { "epoch": 0.1164202021679127, "grad_norm": 5.754036728902357, "learning_rate": 9.805505799765708e-06, "loss": 17.1506, "step": 6369 }, { "epoch": 0.11643848136435922, "grad_norm": 8.085855456180017, "learning_rate": 9.805424033964773e-06, "loss": 18.0149, "step": 6370 }, { "epoch": 0.11645676056080574, "grad_norm": 6.392223105722064, "learning_rate": 9.805342251321242e-06, "loss": 17.6478, "step": 6371 }, { "epoch": 0.11647503975725228, "grad_norm": 7.042051932028251, "learning_rate": 9.805260451835405e-06, "loss": 17.6313, "step": 6372 }, { "epoch": 0.1164933189536988, "grad_norm": 6.753130957874067, "learning_rate": 9.805178635507547e-06, "loss": 17.733, "step": 6373 }, { "epoch": 0.11651159815014532, "grad_norm": 6.277624375642367, "learning_rate": 9.805096802337954e-06, "loss": 17.4527, "step": 6374 }, { "epoch": 0.11652987734659184, "grad_norm": 8.559251219177527, "learning_rate": 9.805014952326915e-06, "loss": 18.4475, "step": 6375 }, { "epoch": 0.11654815654303836, "grad_norm": 6.427180880428312, "learning_rate": 9.804933085474715e-06, "loss": 17.4267, "step": 6376 }, { "epoch": 0.11656643573948489, "grad_norm": 6.625726581180322, "learning_rate": 9.804851201781641e-06, "loss": 17.6082, "step": 6377 }, { "epoch": 0.11658471493593142, "grad_norm": 6.868704675957353, "learning_rate": 9.80476930124798e-06, "loss": 17.6177, "step": 6378 }, { "epoch": 0.11660299413237794, "grad_norm": 7.023585021843053, "learning_rate": 9.804687383874021e-06, "loss": 17.7069, "step": 6379 }, { "epoch": 0.11662127332882447, "grad_norm": 8.013740996817095, "learning_rate": 9.80460544966005e-06, "loss": 18.3447, "step": 6380 }, { "epoch": 0.11663955252527099, "grad_norm": 7.301934200534333, "learning_rate": 9.804523498606351e-06, "loss": 18.204, "step": 6381 }, { "epoch": 0.11665783172171751, "grad_norm": 8.691675307504308, "learning_rate": 9.804441530713217e-06, "loss": 18.1091, "step": 6382 }, { "epoch": 0.11667611091816404, "grad_norm": 5.254428444675815, "learning_rate": 9.804359545980931e-06, "loss": 16.9172, "step": 6383 }, { "epoch": 0.11669439011461057, "grad_norm": 6.155877543797338, "learning_rate": 9.804277544409782e-06, "loss": 17.4798, "step": 6384 }, { "epoch": 0.11671266931105709, "grad_norm": 6.564385697941725, "learning_rate": 9.804195526000057e-06, "loss": 17.5002, "step": 6385 }, { "epoch": 0.11673094850750361, "grad_norm": 8.185972406187952, "learning_rate": 9.804113490752044e-06, "loss": 18.2783, "step": 6386 }, { "epoch": 0.11674922770395013, "grad_norm": 8.422075775427512, "learning_rate": 9.80403143866603e-06, "loss": 18.0337, "step": 6387 }, { "epoch": 0.11676750690039665, "grad_norm": 7.796217893917706, "learning_rate": 9.803949369742303e-06, "loss": 18.2966, "step": 6388 }, { "epoch": 0.11678578609684319, "grad_norm": 7.3098209433518875, "learning_rate": 9.803867283981149e-06, "loss": 17.8664, "step": 6389 }, { "epoch": 0.11680406529328971, "grad_norm": 7.785805337739391, "learning_rate": 9.803785181382858e-06, "loss": 17.8803, "step": 6390 }, { "epoch": 0.11682234448973623, "grad_norm": 7.111380704612183, "learning_rate": 9.803703061947716e-06, "loss": 17.5372, "step": 6391 }, { "epoch": 0.11684062368618275, "grad_norm": 6.453292618911451, "learning_rate": 9.803620925676011e-06, "loss": 17.5098, "step": 6392 }, { "epoch": 0.11685890288262928, "grad_norm": 8.43288871285481, "learning_rate": 9.803538772568034e-06, "loss": 18.4735, "step": 6393 }, { "epoch": 0.1168771820790758, "grad_norm": 7.025785259728636, "learning_rate": 9.803456602624069e-06, "loss": 17.8423, "step": 6394 }, { "epoch": 0.11689546127552233, "grad_norm": 7.078785596352402, "learning_rate": 9.803374415844406e-06, "loss": 17.6048, "step": 6395 }, { "epoch": 0.11691374047196885, "grad_norm": 7.191447585990892, "learning_rate": 9.803292212229332e-06, "loss": 17.6487, "step": 6396 }, { "epoch": 0.11693201966841538, "grad_norm": 6.977973314987887, "learning_rate": 9.803209991779134e-06, "loss": 17.8112, "step": 6397 }, { "epoch": 0.1169502988648619, "grad_norm": 6.160629282524603, "learning_rate": 9.803127754494105e-06, "loss": 17.5099, "step": 6398 }, { "epoch": 0.11696857806130842, "grad_norm": 6.456529053780158, "learning_rate": 9.803045500374528e-06, "loss": 17.5749, "step": 6399 }, { "epoch": 0.11698685725775496, "grad_norm": 6.625938420317166, "learning_rate": 9.802963229420694e-06, "loss": 17.5932, "step": 6400 }, { "epoch": 0.11700513645420148, "grad_norm": 8.576589550161236, "learning_rate": 9.802880941632891e-06, "loss": 18.4689, "step": 6401 }, { "epoch": 0.117023415650648, "grad_norm": 6.944979006020021, "learning_rate": 9.802798637011406e-06, "loss": 17.1432, "step": 6402 }, { "epoch": 0.11704169484709452, "grad_norm": 6.443120159768358, "learning_rate": 9.802716315556528e-06, "loss": 17.5015, "step": 6403 }, { "epoch": 0.11705997404354104, "grad_norm": 6.125188488621004, "learning_rate": 9.802633977268547e-06, "loss": 17.443, "step": 6404 }, { "epoch": 0.11707825323998756, "grad_norm": 9.200141359939387, "learning_rate": 9.80255162214775e-06, "loss": 18.4959, "step": 6405 }, { "epoch": 0.1170965324364341, "grad_norm": 8.204597681266769, "learning_rate": 9.802469250194429e-06, "loss": 18.3473, "step": 6406 }, { "epoch": 0.11711481163288062, "grad_norm": 6.374708048046521, "learning_rate": 9.802386861408868e-06, "loss": 17.453, "step": 6407 }, { "epoch": 0.11713309082932714, "grad_norm": 8.756956476021765, "learning_rate": 9.802304455791358e-06, "loss": 18.4878, "step": 6408 }, { "epoch": 0.11715137002577367, "grad_norm": 7.145826288671658, "learning_rate": 9.802222033342187e-06, "loss": 17.8972, "step": 6409 }, { "epoch": 0.11716964922222019, "grad_norm": 7.308038267239658, "learning_rate": 9.802139594061645e-06, "loss": 17.4195, "step": 6410 }, { "epoch": 0.11718792841866671, "grad_norm": 6.691694777386175, "learning_rate": 9.80205713795002e-06, "loss": 17.5466, "step": 6411 }, { "epoch": 0.11720620761511324, "grad_norm": 10.977301694384694, "learning_rate": 9.801974665007602e-06, "loss": 18.3623, "step": 6412 }, { "epoch": 0.11722448681155977, "grad_norm": 7.136042291072314, "learning_rate": 9.80189217523468e-06, "loss": 18.1256, "step": 6413 }, { "epoch": 0.11724276600800629, "grad_norm": 7.732945738852875, "learning_rate": 9.801809668631542e-06, "loss": 18.0761, "step": 6414 }, { "epoch": 0.11726104520445281, "grad_norm": 6.625258155185103, "learning_rate": 9.801727145198478e-06, "loss": 17.6355, "step": 6415 }, { "epoch": 0.11727932440089933, "grad_norm": 7.129781251303618, "learning_rate": 9.801644604935776e-06, "loss": 17.8709, "step": 6416 }, { "epoch": 0.11729760359734587, "grad_norm": 6.751901967124584, "learning_rate": 9.801562047843727e-06, "loss": 17.5563, "step": 6417 }, { "epoch": 0.11731588279379239, "grad_norm": 7.896663379369354, "learning_rate": 9.80147947392262e-06, "loss": 18.2147, "step": 6418 }, { "epoch": 0.11733416199023891, "grad_norm": 6.829852354286273, "learning_rate": 9.801396883172744e-06, "loss": 17.8405, "step": 6419 }, { "epoch": 0.11735244118668543, "grad_norm": 8.027455516082357, "learning_rate": 9.801314275594389e-06, "loss": 17.7207, "step": 6420 }, { "epoch": 0.11737072038313195, "grad_norm": 5.83849562073646, "learning_rate": 9.801231651187844e-06, "loss": 17.2513, "step": 6421 }, { "epoch": 0.11738899957957848, "grad_norm": 6.498057274825543, "learning_rate": 9.801149009953397e-06, "loss": 17.5261, "step": 6422 }, { "epoch": 0.11740727877602501, "grad_norm": 6.481551183585061, "learning_rate": 9.801066351891341e-06, "loss": 17.5379, "step": 6423 }, { "epoch": 0.11742555797247153, "grad_norm": 8.404955598684786, "learning_rate": 9.800983677001962e-06, "loss": 17.6598, "step": 6424 }, { "epoch": 0.11744383716891806, "grad_norm": 8.42212805213432, "learning_rate": 9.800900985285554e-06, "loss": 18.046, "step": 6425 }, { "epoch": 0.11746211636536458, "grad_norm": 7.176008702550471, "learning_rate": 9.800818276742405e-06, "loss": 17.7202, "step": 6426 }, { "epoch": 0.1174803955618111, "grad_norm": 8.417184757751102, "learning_rate": 9.800735551372804e-06, "loss": 18.2777, "step": 6427 }, { "epoch": 0.11749867475825762, "grad_norm": 6.548088527082842, "learning_rate": 9.80065280917704e-06, "loss": 17.5299, "step": 6428 }, { "epoch": 0.11751695395470416, "grad_norm": 10.397837457025595, "learning_rate": 9.800570050155406e-06, "loss": 18.4056, "step": 6429 }, { "epoch": 0.11753523315115068, "grad_norm": 6.12273016604697, "learning_rate": 9.800487274308191e-06, "loss": 17.3399, "step": 6430 }, { "epoch": 0.1175535123475972, "grad_norm": 7.357488772562387, "learning_rate": 9.800404481635683e-06, "loss": 18.1048, "step": 6431 }, { "epoch": 0.11757179154404372, "grad_norm": 7.470281827265112, "learning_rate": 9.800321672138176e-06, "loss": 17.644, "step": 6432 }, { "epoch": 0.11759007074049024, "grad_norm": 7.999336679854122, "learning_rate": 9.800238845815956e-06, "loss": 18.5439, "step": 6433 }, { "epoch": 0.11760834993693678, "grad_norm": 8.806640978534022, "learning_rate": 9.800156002669317e-06, "loss": 18.4895, "step": 6434 }, { "epoch": 0.1176266291333833, "grad_norm": 7.52573875475446, "learning_rate": 9.800073142698545e-06, "loss": 18.1755, "step": 6435 }, { "epoch": 0.11764490832982982, "grad_norm": 6.675257678545468, "learning_rate": 9.799990265903936e-06, "loss": 17.564, "step": 6436 }, { "epoch": 0.11766318752627634, "grad_norm": 7.144905545526197, "learning_rate": 9.799907372285778e-06, "loss": 17.9521, "step": 6437 }, { "epoch": 0.11768146672272287, "grad_norm": 6.362649613459145, "learning_rate": 9.799824461844358e-06, "loss": 17.2563, "step": 6438 }, { "epoch": 0.11769974591916939, "grad_norm": 6.904921285489309, "learning_rate": 9.799741534579972e-06, "loss": 17.6227, "step": 6439 }, { "epoch": 0.11771802511561592, "grad_norm": 7.443050796729453, "learning_rate": 9.799658590492909e-06, "loss": 17.8672, "step": 6440 }, { "epoch": 0.11773630431206245, "grad_norm": 6.956501862183829, "learning_rate": 9.799575629583457e-06, "loss": 17.602, "step": 6441 }, { "epoch": 0.11775458350850897, "grad_norm": 7.315264576892508, "learning_rate": 9.79949265185191e-06, "loss": 17.796, "step": 6442 }, { "epoch": 0.11777286270495549, "grad_norm": 7.571036138757203, "learning_rate": 9.799409657298559e-06, "loss": 18.1444, "step": 6443 }, { "epoch": 0.11779114190140201, "grad_norm": 6.906011070661719, "learning_rate": 9.799326645923692e-06, "loss": 17.7722, "step": 6444 }, { "epoch": 0.11780942109784853, "grad_norm": 6.901236194432724, "learning_rate": 9.799243617727603e-06, "loss": 17.7963, "step": 6445 }, { "epoch": 0.11782770029429507, "grad_norm": 8.612221112737895, "learning_rate": 9.79916057271058e-06, "loss": 18.0595, "step": 6446 }, { "epoch": 0.11784597949074159, "grad_norm": 8.554722330225086, "learning_rate": 9.799077510872916e-06, "loss": 18.0822, "step": 6447 }, { "epoch": 0.11786425868718811, "grad_norm": 6.5271612616553, "learning_rate": 9.798994432214901e-06, "loss": 17.4995, "step": 6448 }, { "epoch": 0.11788253788363463, "grad_norm": 6.553220588831177, "learning_rate": 9.798911336736829e-06, "loss": 17.3902, "step": 6449 }, { "epoch": 0.11790081708008116, "grad_norm": 7.953352621405939, "learning_rate": 9.79882822443899e-06, "loss": 18.1591, "step": 6450 }, { "epoch": 0.11791909627652769, "grad_norm": 7.450150154473341, "learning_rate": 9.79874509532167e-06, "loss": 17.8105, "step": 6451 }, { "epoch": 0.11793737547297421, "grad_norm": 7.344572475847884, "learning_rate": 9.79866194938517e-06, "loss": 17.9767, "step": 6452 }, { "epoch": 0.11795565466942073, "grad_norm": 8.533770454549908, "learning_rate": 9.798578786629774e-06, "loss": 18.6686, "step": 6453 }, { "epoch": 0.11797393386586726, "grad_norm": 6.325517074232184, "learning_rate": 9.798495607055773e-06, "loss": 17.2783, "step": 6454 }, { "epoch": 0.11799221306231378, "grad_norm": 8.253855300487562, "learning_rate": 9.798412410663466e-06, "loss": 18.3235, "step": 6455 }, { "epoch": 0.1180104922587603, "grad_norm": 8.596405157604142, "learning_rate": 9.798329197453136e-06, "loss": 18.1615, "step": 6456 }, { "epoch": 0.11802877145520684, "grad_norm": 5.917307156883019, "learning_rate": 9.798245967425081e-06, "loss": 17.3634, "step": 6457 }, { "epoch": 0.11804705065165336, "grad_norm": 8.188917822781782, "learning_rate": 9.79816272057959e-06, "loss": 18.2016, "step": 6458 }, { "epoch": 0.11806532984809988, "grad_norm": 6.486414674149526, "learning_rate": 9.798079456916954e-06, "loss": 17.3954, "step": 6459 }, { "epoch": 0.1180836090445464, "grad_norm": 7.555090203044301, "learning_rate": 9.797996176437467e-06, "loss": 17.7253, "step": 6460 }, { "epoch": 0.11810188824099292, "grad_norm": 8.175442463062188, "learning_rate": 9.79791287914142e-06, "loss": 17.8376, "step": 6461 }, { "epoch": 0.11812016743743944, "grad_norm": 6.995608011331642, "learning_rate": 9.797829565029103e-06, "loss": 17.6152, "step": 6462 }, { "epoch": 0.11813844663388598, "grad_norm": 6.1874870425570165, "learning_rate": 9.797746234100811e-06, "loss": 17.4161, "step": 6463 }, { "epoch": 0.1181567258303325, "grad_norm": 7.507360658716251, "learning_rate": 9.797662886356833e-06, "loss": 17.953, "step": 6464 }, { "epoch": 0.11817500502677902, "grad_norm": 6.90773495193432, "learning_rate": 9.797579521797466e-06, "loss": 17.7612, "step": 6465 }, { "epoch": 0.11819328422322554, "grad_norm": 6.636870108478468, "learning_rate": 9.797496140422997e-06, "loss": 17.5357, "step": 6466 }, { "epoch": 0.11821156341967207, "grad_norm": 8.033725427160187, "learning_rate": 9.797412742233721e-06, "loss": 17.9367, "step": 6467 }, { "epoch": 0.1182298426161186, "grad_norm": 7.471387238579187, "learning_rate": 9.79732932722993e-06, "loss": 18.1184, "step": 6468 }, { "epoch": 0.11824812181256512, "grad_norm": 6.63298260552759, "learning_rate": 9.797245895411915e-06, "loss": 17.6509, "step": 6469 }, { "epoch": 0.11826640100901165, "grad_norm": 7.265387902671119, "learning_rate": 9.797162446779969e-06, "loss": 17.6117, "step": 6470 }, { "epoch": 0.11828468020545817, "grad_norm": 7.474828687299429, "learning_rate": 9.797078981334386e-06, "loss": 18.049, "step": 6471 }, { "epoch": 0.11830295940190469, "grad_norm": 6.423152761167049, "learning_rate": 9.796995499075457e-06, "loss": 17.5033, "step": 6472 }, { "epoch": 0.11832123859835121, "grad_norm": 6.747138888401617, "learning_rate": 9.796912000003475e-06, "loss": 17.7517, "step": 6473 }, { "epoch": 0.11833951779479775, "grad_norm": 7.160861374191156, "learning_rate": 9.796828484118734e-06, "loss": 17.9269, "step": 6474 }, { "epoch": 0.11835779699124427, "grad_norm": 6.613910339644413, "learning_rate": 9.796744951421524e-06, "loss": 17.4928, "step": 6475 }, { "epoch": 0.11837607618769079, "grad_norm": 8.225071522638741, "learning_rate": 9.796661401912138e-06, "loss": 18.0207, "step": 6476 }, { "epoch": 0.11839435538413731, "grad_norm": 7.226856116001023, "learning_rate": 9.796577835590873e-06, "loss": 17.8802, "step": 6477 }, { "epoch": 0.11841263458058383, "grad_norm": 6.713185030496041, "learning_rate": 9.796494252458018e-06, "loss": 17.7731, "step": 6478 }, { "epoch": 0.11843091377703036, "grad_norm": 7.02622367697721, "learning_rate": 9.796410652513866e-06, "loss": 18.0381, "step": 6479 }, { "epoch": 0.11844919297347689, "grad_norm": 7.305191679821685, "learning_rate": 9.79632703575871e-06, "loss": 17.9136, "step": 6480 }, { "epoch": 0.11846747216992341, "grad_norm": 7.283521586820067, "learning_rate": 9.796243402192845e-06, "loss": 17.5094, "step": 6481 }, { "epoch": 0.11848575136636993, "grad_norm": 7.068732044920499, "learning_rate": 9.796159751816563e-06, "loss": 18.1282, "step": 6482 }, { "epoch": 0.11850403056281646, "grad_norm": 8.194073750688434, "learning_rate": 9.796076084630157e-06, "loss": 17.8692, "step": 6483 }, { "epoch": 0.11852230975926298, "grad_norm": 6.827089857509455, "learning_rate": 9.795992400633923e-06, "loss": 17.8544, "step": 6484 }, { "epoch": 0.11854058895570951, "grad_norm": 7.3389057459812, "learning_rate": 9.79590869982815e-06, "loss": 17.486, "step": 6485 }, { "epoch": 0.11855886815215604, "grad_norm": 6.334095457705487, "learning_rate": 9.795824982213132e-06, "loss": 17.6059, "step": 6486 }, { "epoch": 0.11857714734860256, "grad_norm": 5.770866119526604, "learning_rate": 9.795741247789164e-06, "loss": 17.2927, "step": 6487 }, { "epoch": 0.11859542654504908, "grad_norm": 6.7825318697015105, "learning_rate": 9.79565749655654e-06, "loss": 17.5687, "step": 6488 }, { "epoch": 0.1186137057414956, "grad_norm": 7.812693039624006, "learning_rate": 9.795573728515553e-06, "loss": 18.2304, "step": 6489 }, { "epoch": 0.11863198493794212, "grad_norm": 6.52843103040547, "learning_rate": 9.795489943666494e-06, "loss": 17.6061, "step": 6490 }, { "epoch": 0.11865026413438866, "grad_norm": 6.42790272523629, "learning_rate": 9.79540614200966e-06, "loss": 17.6545, "step": 6491 }, { "epoch": 0.11866854333083518, "grad_norm": 7.890960366904658, "learning_rate": 9.795322323545345e-06, "loss": 17.8171, "step": 6492 }, { "epoch": 0.1186868225272817, "grad_norm": 8.097096802324884, "learning_rate": 9.795238488273841e-06, "loss": 18.2555, "step": 6493 }, { "epoch": 0.11870510172372822, "grad_norm": 8.405031935416428, "learning_rate": 9.79515463619544e-06, "loss": 18.142, "step": 6494 }, { "epoch": 0.11872338092017475, "grad_norm": 6.724443918721763, "learning_rate": 9.79507076731044e-06, "loss": 17.6722, "step": 6495 }, { "epoch": 0.11874166011662127, "grad_norm": 7.80273706210259, "learning_rate": 9.794986881619132e-06, "loss": 18.2217, "step": 6496 }, { "epoch": 0.1187599393130678, "grad_norm": 6.818244420135292, "learning_rate": 9.794902979121813e-06, "loss": 17.4039, "step": 6497 }, { "epoch": 0.11877821850951432, "grad_norm": 7.656089440816266, "learning_rate": 9.794819059818775e-06, "loss": 17.6009, "step": 6498 }, { "epoch": 0.11879649770596085, "grad_norm": 7.1953928996191046, "learning_rate": 9.794735123710311e-06, "loss": 17.8115, "step": 6499 }, { "epoch": 0.11881477690240737, "grad_norm": 7.191384965477584, "learning_rate": 9.794651170796717e-06, "loss": 17.7287, "step": 6500 }, { "epoch": 0.11883305609885389, "grad_norm": 5.405317409410683, "learning_rate": 9.794567201078284e-06, "loss": 16.9769, "step": 6501 }, { "epoch": 0.11885133529530043, "grad_norm": 5.916881991659906, "learning_rate": 9.794483214555313e-06, "loss": 17.3112, "step": 6502 }, { "epoch": 0.11886961449174695, "grad_norm": 8.955135080248823, "learning_rate": 9.794399211228092e-06, "loss": 18.3316, "step": 6503 }, { "epoch": 0.11888789368819347, "grad_norm": 7.922817280931787, "learning_rate": 9.79431519109692e-06, "loss": 18.2057, "step": 6504 }, { "epoch": 0.11890617288463999, "grad_norm": 7.3757294383020975, "learning_rate": 9.794231154162087e-06, "loss": 17.7756, "step": 6505 }, { "epoch": 0.11892445208108651, "grad_norm": 6.700722597819988, "learning_rate": 9.79414710042389e-06, "loss": 17.5947, "step": 6506 }, { "epoch": 0.11894273127753303, "grad_norm": 8.167591367137772, "learning_rate": 9.794063029882625e-06, "loss": 18.071, "step": 6507 }, { "epoch": 0.11896101047397957, "grad_norm": 7.338127437379368, "learning_rate": 9.793978942538583e-06, "loss": 17.724, "step": 6508 }, { "epoch": 0.11897928967042609, "grad_norm": 7.703296730988331, "learning_rate": 9.793894838392062e-06, "loss": 17.9528, "step": 6509 }, { "epoch": 0.11899756886687261, "grad_norm": 6.490671664928666, "learning_rate": 9.793810717443356e-06, "loss": 17.4084, "step": 6510 }, { "epoch": 0.11901584806331914, "grad_norm": 10.372900084340094, "learning_rate": 9.79372657969276e-06, "loss": 19.1718, "step": 6511 }, { "epoch": 0.11903412725976566, "grad_norm": 9.095616702660106, "learning_rate": 9.793642425140567e-06, "loss": 18.216, "step": 6512 }, { "epoch": 0.11905240645621218, "grad_norm": 9.791638205380364, "learning_rate": 9.793558253787072e-06, "loss": 19.3034, "step": 6513 }, { "epoch": 0.11907068565265871, "grad_norm": 6.938984583399672, "learning_rate": 9.793474065632574e-06, "loss": 17.6442, "step": 6514 }, { "epoch": 0.11908896484910524, "grad_norm": 6.617292142701157, "learning_rate": 9.793389860677364e-06, "loss": 17.734, "step": 6515 }, { "epoch": 0.11910724404555176, "grad_norm": 8.419519258940735, "learning_rate": 9.793305638921738e-06, "loss": 18.0048, "step": 6516 }, { "epoch": 0.11912552324199828, "grad_norm": 7.748242283614979, "learning_rate": 9.793221400365993e-06, "loss": 17.7052, "step": 6517 }, { "epoch": 0.1191438024384448, "grad_norm": 8.192685354082709, "learning_rate": 9.793137145010423e-06, "loss": 18.4234, "step": 6518 }, { "epoch": 0.11916208163489134, "grad_norm": 6.9733892634945995, "learning_rate": 9.793052872855322e-06, "loss": 17.5065, "step": 6519 }, { "epoch": 0.11918036083133786, "grad_norm": 7.444446036851159, "learning_rate": 9.792968583900988e-06, "loss": 17.8992, "step": 6520 }, { "epoch": 0.11919864002778438, "grad_norm": 6.807499423338599, "learning_rate": 9.792884278147714e-06, "loss": 17.6398, "step": 6521 }, { "epoch": 0.1192169192242309, "grad_norm": 6.096419198087533, "learning_rate": 9.792799955595796e-06, "loss": 17.264, "step": 6522 }, { "epoch": 0.11923519842067742, "grad_norm": 6.42210350027216, "learning_rate": 9.792715616245532e-06, "loss": 17.5809, "step": 6523 }, { "epoch": 0.11925347761712395, "grad_norm": 7.436633208657352, "learning_rate": 9.792631260097214e-06, "loss": 17.7595, "step": 6524 }, { "epoch": 0.11927175681357048, "grad_norm": 9.280548299826297, "learning_rate": 9.79254688715114e-06, "loss": 18.5013, "step": 6525 }, { "epoch": 0.119290036010017, "grad_norm": 8.279025851654675, "learning_rate": 9.792462497407604e-06, "loss": 17.7648, "step": 6526 }, { "epoch": 0.11930831520646352, "grad_norm": 8.371288874130814, "learning_rate": 9.792378090866904e-06, "loss": 18.0224, "step": 6527 }, { "epoch": 0.11932659440291005, "grad_norm": 6.9459894384664755, "learning_rate": 9.792293667529334e-06, "loss": 17.8487, "step": 6528 }, { "epoch": 0.11934487359935657, "grad_norm": 5.985626661647108, "learning_rate": 9.79220922739519e-06, "loss": 17.2801, "step": 6529 }, { "epoch": 0.11936315279580309, "grad_norm": 7.520338406965312, "learning_rate": 9.79212477046477e-06, "loss": 17.7614, "step": 6530 }, { "epoch": 0.11938143199224963, "grad_norm": 8.01768301805792, "learning_rate": 9.792040296738367e-06, "loss": 18.0922, "step": 6531 }, { "epoch": 0.11939971118869615, "grad_norm": 6.270714170837754, "learning_rate": 9.79195580621628e-06, "loss": 17.7132, "step": 6532 }, { "epoch": 0.11941799038514267, "grad_norm": 7.937258621373097, "learning_rate": 9.791871298898804e-06, "loss": 18.1272, "step": 6533 }, { "epoch": 0.11943626958158919, "grad_norm": 8.66782154213655, "learning_rate": 9.791786774786234e-06, "loss": 18.029, "step": 6534 }, { "epoch": 0.11945454877803571, "grad_norm": 6.578285988582351, "learning_rate": 9.791702233878867e-06, "loss": 17.6913, "step": 6535 }, { "epoch": 0.11947282797448225, "grad_norm": 6.336874696532593, "learning_rate": 9.791617676176999e-06, "loss": 17.4965, "step": 6536 }, { "epoch": 0.11949110717092877, "grad_norm": 7.599347049717939, "learning_rate": 9.791533101680928e-06, "loss": 18.0085, "step": 6537 }, { "epoch": 0.11950938636737529, "grad_norm": 5.948878988910011, "learning_rate": 9.791448510390948e-06, "loss": 17.3512, "step": 6538 }, { "epoch": 0.11952766556382181, "grad_norm": 6.822620871610262, "learning_rate": 9.791363902307357e-06, "loss": 17.5646, "step": 6539 }, { "epoch": 0.11954594476026834, "grad_norm": 8.442392421658504, "learning_rate": 9.791279277430453e-06, "loss": 18.2527, "step": 6540 }, { "epoch": 0.11956422395671486, "grad_norm": 7.432941474388194, "learning_rate": 9.79119463576053e-06, "loss": 17.8236, "step": 6541 }, { "epoch": 0.11958250315316139, "grad_norm": 8.100810925325277, "learning_rate": 9.791109977297886e-06, "loss": 18.1798, "step": 6542 }, { "epoch": 0.11960078234960791, "grad_norm": 6.695797003240366, "learning_rate": 9.791025302042816e-06, "loss": 17.7148, "step": 6543 }, { "epoch": 0.11961906154605444, "grad_norm": 7.120505481509592, "learning_rate": 9.790940609995618e-06, "loss": 17.9138, "step": 6544 }, { "epoch": 0.11963734074250096, "grad_norm": 7.296967713881201, "learning_rate": 9.79085590115659e-06, "loss": 17.8762, "step": 6545 }, { "epoch": 0.11965561993894748, "grad_norm": 7.328045571999824, "learning_rate": 9.790771175526028e-06, "loss": 17.5075, "step": 6546 }, { "epoch": 0.119673899135394, "grad_norm": 6.998914971455784, "learning_rate": 9.790686433104229e-06, "loss": 17.4713, "step": 6547 }, { "epoch": 0.11969217833184054, "grad_norm": 8.160249802553942, "learning_rate": 9.790601673891488e-06, "loss": 18.2393, "step": 6548 }, { "epoch": 0.11971045752828706, "grad_norm": 6.258968282972264, "learning_rate": 9.790516897888105e-06, "loss": 17.227, "step": 6549 }, { "epoch": 0.11972873672473358, "grad_norm": 6.358986838184495, "learning_rate": 9.790432105094376e-06, "loss": 17.3255, "step": 6550 }, { "epoch": 0.1197470159211801, "grad_norm": 6.693172918968336, "learning_rate": 9.790347295510597e-06, "loss": 17.4555, "step": 6551 }, { "epoch": 0.11976529511762662, "grad_norm": 6.735622330137925, "learning_rate": 9.790262469137068e-06, "loss": 17.8505, "step": 6552 }, { "epoch": 0.11978357431407316, "grad_norm": 7.432447853508037, "learning_rate": 9.790177625974084e-06, "loss": 17.8742, "step": 6553 }, { "epoch": 0.11980185351051968, "grad_norm": 7.732242969332778, "learning_rate": 9.790092766021943e-06, "loss": 18.0215, "step": 6554 }, { "epoch": 0.1198201327069662, "grad_norm": 7.625990731517096, "learning_rate": 9.790007889280942e-06, "loss": 18.2303, "step": 6555 }, { "epoch": 0.11983841190341273, "grad_norm": 7.136837631975101, "learning_rate": 9.78992299575138e-06, "loss": 18.2816, "step": 6556 }, { "epoch": 0.11985669109985925, "grad_norm": 8.04377617285793, "learning_rate": 9.789838085433554e-06, "loss": 18.6148, "step": 6557 }, { "epoch": 0.11987497029630577, "grad_norm": 7.731295894701705, "learning_rate": 9.78975315832776e-06, "loss": 18.208, "step": 6558 }, { "epoch": 0.1198932494927523, "grad_norm": 7.052510981025374, "learning_rate": 9.789668214434296e-06, "loss": 17.6294, "step": 6559 }, { "epoch": 0.11991152868919883, "grad_norm": 8.321861239998073, "learning_rate": 9.789583253753463e-06, "loss": 17.7319, "step": 6560 }, { "epoch": 0.11992980788564535, "grad_norm": 6.498838604629319, "learning_rate": 9.789498276285554e-06, "loss": 17.6915, "step": 6561 }, { "epoch": 0.11994808708209187, "grad_norm": 7.443228388632784, "learning_rate": 9.78941328203087e-06, "loss": 17.7216, "step": 6562 }, { "epoch": 0.11996636627853839, "grad_norm": 6.529059873526048, "learning_rate": 9.789328270989709e-06, "loss": 17.5777, "step": 6563 }, { "epoch": 0.11998464547498491, "grad_norm": 6.723451701396521, "learning_rate": 9.789243243162368e-06, "loss": 17.6907, "step": 6564 }, { "epoch": 0.12000292467143145, "grad_norm": 6.8791003187608855, "learning_rate": 9.789158198549142e-06, "loss": 17.7716, "step": 6565 }, { "epoch": 0.12002120386787797, "grad_norm": 7.197813593107489, "learning_rate": 9.789073137150335e-06, "loss": 18.0089, "step": 6566 }, { "epoch": 0.12003948306432449, "grad_norm": 7.657156468405905, "learning_rate": 9.788988058966242e-06, "loss": 17.79, "step": 6567 }, { "epoch": 0.12005776226077101, "grad_norm": 7.010305958903088, "learning_rate": 9.788902963997161e-06, "loss": 17.4947, "step": 6568 }, { "epoch": 0.12007604145721754, "grad_norm": 7.652939229137941, "learning_rate": 9.78881785224339e-06, "loss": 18.1834, "step": 6569 }, { "epoch": 0.12009432065366407, "grad_norm": 7.3832717808589905, "learning_rate": 9.78873272370523e-06, "loss": 17.7098, "step": 6570 }, { "epoch": 0.1201125998501106, "grad_norm": 6.797808442800341, "learning_rate": 9.788647578382975e-06, "loss": 17.3152, "step": 6571 }, { "epoch": 0.12013087904655712, "grad_norm": 9.297017300811333, "learning_rate": 9.788562416276928e-06, "loss": 18.6175, "step": 6572 }, { "epoch": 0.12014915824300364, "grad_norm": 7.311421378016831, "learning_rate": 9.788477237387384e-06, "loss": 17.615, "step": 6573 }, { "epoch": 0.12016743743945016, "grad_norm": 6.93790451572851, "learning_rate": 9.788392041714642e-06, "loss": 17.9713, "step": 6574 }, { "epoch": 0.12018571663589668, "grad_norm": 7.290709844297043, "learning_rate": 9.788306829259002e-06, "loss": 17.7142, "step": 6575 }, { "epoch": 0.12020399583234322, "grad_norm": 6.99043375359672, "learning_rate": 9.788221600020763e-06, "loss": 17.4934, "step": 6576 }, { "epoch": 0.12022227502878974, "grad_norm": 8.057731335287574, "learning_rate": 9.788136354000221e-06, "loss": 18.308, "step": 6577 }, { "epoch": 0.12024055422523626, "grad_norm": 9.220128603302165, "learning_rate": 9.788051091197679e-06, "loss": 18.803, "step": 6578 }, { "epoch": 0.12025883342168278, "grad_norm": 7.180361320473192, "learning_rate": 9.78796581161343e-06, "loss": 17.7055, "step": 6579 }, { "epoch": 0.1202771126181293, "grad_norm": 7.202360795240422, "learning_rate": 9.78788051524778e-06, "loss": 17.6053, "step": 6580 }, { "epoch": 0.12029539181457582, "grad_norm": 7.949602049517095, "learning_rate": 9.787795202101022e-06, "loss": 18.6818, "step": 6581 }, { "epoch": 0.12031367101102236, "grad_norm": 7.290520032674815, "learning_rate": 9.787709872173459e-06, "loss": 17.8462, "step": 6582 }, { "epoch": 0.12033195020746888, "grad_norm": 7.486266145220681, "learning_rate": 9.787624525465386e-06, "loss": 17.8674, "step": 6583 }, { "epoch": 0.1203502294039154, "grad_norm": 8.300157535115929, "learning_rate": 9.787539161977107e-06, "loss": 18.4809, "step": 6584 }, { "epoch": 0.12036850860036193, "grad_norm": 7.249217641057837, "learning_rate": 9.787453781708918e-06, "loss": 17.7896, "step": 6585 }, { "epoch": 0.12038678779680845, "grad_norm": 8.136674193153222, "learning_rate": 9.787368384661117e-06, "loss": 18.0296, "step": 6586 }, { "epoch": 0.12040506699325498, "grad_norm": 7.170638263949791, "learning_rate": 9.787282970834008e-06, "loss": 17.782, "step": 6587 }, { "epoch": 0.1204233461897015, "grad_norm": 7.385747835699894, "learning_rate": 9.787197540227887e-06, "loss": 17.8215, "step": 6588 }, { "epoch": 0.12044162538614803, "grad_norm": 7.774225347474626, "learning_rate": 9.787112092843052e-06, "loss": 18.4174, "step": 6589 }, { "epoch": 0.12045990458259455, "grad_norm": 6.440299019895617, "learning_rate": 9.787026628679806e-06, "loss": 17.5166, "step": 6590 }, { "epoch": 0.12047818377904107, "grad_norm": 6.0228606643020495, "learning_rate": 9.786941147738446e-06, "loss": 17.2161, "step": 6591 }, { "epoch": 0.12049646297548759, "grad_norm": 8.424455245304133, "learning_rate": 9.786855650019275e-06, "loss": 18.4105, "step": 6592 }, { "epoch": 0.12051474217193413, "grad_norm": 6.116248050842922, "learning_rate": 9.78677013552259e-06, "loss": 17.1726, "step": 6593 }, { "epoch": 0.12053302136838065, "grad_norm": 7.7242918289890925, "learning_rate": 9.786684604248688e-06, "loss": 18.0852, "step": 6594 }, { "epoch": 0.12055130056482717, "grad_norm": 6.654921197561543, "learning_rate": 9.786599056197874e-06, "loss": 17.6023, "step": 6595 }, { "epoch": 0.12056957976127369, "grad_norm": 6.641258300875004, "learning_rate": 9.786513491370446e-06, "loss": 17.6764, "step": 6596 }, { "epoch": 0.12058785895772021, "grad_norm": 7.115677003135941, "learning_rate": 9.786427909766703e-06, "loss": 17.7292, "step": 6597 }, { "epoch": 0.12060613815416674, "grad_norm": 7.760198261135722, "learning_rate": 9.786342311386946e-06, "loss": 18.1984, "step": 6598 }, { "epoch": 0.12062441735061327, "grad_norm": 6.9906326340819955, "learning_rate": 9.786256696231473e-06, "loss": 17.7639, "step": 6599 }, { "epoch": 0.1206426965470598, "grad_norm": 7.08097195907497, "learning_rate": 9.786171064300587e-06, "loss": 17.4673, "step": 6600 }, { "epoch": 0.12066097574350632, "grad_norm": 7.867628079964912, "learning_rate": 9.786085415594588e-06, "loss": 17.9685, "step": 6601 }, { "epoch": 0.12067925493995284, "grad_norm": 8.08039809075577, "learning_rate": 9.785999750113772e-06, "loss": 18.3153, "step": 6602 }, { "epoch": 0.12069753413639936, "grad_norm": 6.4718450271338375, "learning_rate": 9.785914067858444e-06, "loss": 17.5679, "step": 6603 }, { "epoch": 0.1207158133328459, "grad_norm": 7.613223665863531, "learning_rate": 9.785828368828903e-06, "loss": 18.1562, "step": 6604 }, { "epoch": 0.12073409252929242, "grad_norm": 6.654676585758407, "learning_rate": 9.785742653025448e-06, "loss": 17.5961, "step": 6605 }, { "epoch": 0.12075237172573894, "grad_norm": 7.751378363143706, "learning_rate": 9.78565692044838e-06, "loss": 18.3553, "step": 6606 }, { "epoch": 0.12077065092218546, "grad_norm": 7.724338371565218, "learning_rate": 9.785571171098e-06, "loss": 17.8555, "step": 6607 }, { "epoch": 0.12078893011863198, "grad_norm": 7.7391000573388515, "learning_rate": 9.785485404974608e-06, "loss": 18.285, "step": 6608 }, { "epoch": 0.1208072093150785, "grad_norm": 6.503173067269977, "learning_rate": 9.785399622078505e-06, "loss": 17.5102, "step": 6609 }, { "epoch": 0.12082548851152504, "grad_norm": 9.305663182717261, "learning_rate": 9.785313822409992e-06, "loss": 17.6926, "step": 6610 }, { "epoch": 0.12084376770797156, "grad_norm": 8.544767880225772, "learning_rate": 9.785228005969369e-06, "loss": 18.3077, "step": 6611 }, { "epoch": 0.12086204690441808, "grad_norm": 7.273019054278335, "learning_rate": 9.785142172756937e-06, "loss": 17.7531, "step": 6612 }, { "epoch": 0.1208803261008646, "grad_norm": 9.788212676017627, "learning_rate": 9.785056322772997e-06, "loss": 18.5349, "step": 6613 }, { "epoch": 0.12089860529731113, "grad_norm": 6.958116567792477, "learning_rate": 9.784970456017851e-06, "loss": 17.5714, "step": 6614 }, { "epoch": 0.12091688449375765, "grad_norm": 8.095007728727529, "learning_rate": 9.784884572491798e-06, "loss": 18.2647, "step": 6615 }, { "epoch": 0.12093516369020418, "grad_norm": 6.642636193639059, "learning_rate": 9.784798672195138e-06, "loss": 17.8691, "step": 6616 }, { "epoch": 0.1209534428866507, "grad_norm": 6.723446292301932, "learning_rate": 9.784712755128176e-06, "loss": 17.5251, "step": 6617 }, { "epoch": 0.12097172208309723, "grad_norm": 6.4877476563560075, "learning_rate": 9.78462682129121e-06, "loss": 17.5709, "step": 6618 }, { "epoch": 0.12099000127954375, "grad_norm": 5.652538915549849, "learning_rate": 9.784540870684542e-06, "loss": 17.1876, "step": 6619 }, { "epoch": 0.12100828047599027, "grad_norm": 8.07854844816267, "learning_rate": 9.784454903308475e-06, "loss": 18.2179, "step": 6620 }, { "epoch": 0.1210265596724368, "grad_norm": 7.189175614139578, "learning_rate": 9.784368919163307e-06, "loss": 18.0607, "step": 6621 }, { "epoch": 0.12104483886888333, "grad_norm": 6.85131669886279, "learning_rate": 9.78428291824934e-06, "loss": 17.696, "step": 6622 }, { "epoch": 0.12106311806532985, "grad_norm": 6.980487847130155, "learning_rate": 9.78419690056688e-06, "loss": 17.5203, "step": 6623 }, { "epoch": 0.12108139726177637, "grad_norm": 7.103323988532092, "learning_rate": 9.784110866116223e-06, "loss": 17.7473, "step": 6624 }, { "epoch": 0.1210996764582229, "grad_norm": 7.107746857581005, "learning_rate": 9.784024814897675e-06, "loss": 17.7612, "step": 6625 }, { "epoch": 0.12111795565466942, "grad_norm": 7.899770965041136, "learning_rate": 9.783938746911532e-06, "loss": 18.4489, "step": 6626 }, { "epoch": 0.12113623485111595, "grad_norm": 7.480258079191613, "learning_rate": 9.7838526621581e-06, "loss": 18.023, "step": 6627 }, { "epoch": 0.12115451404756247, "grad_norm": 5.904831813224204, "learning_rate": 9.78376656063768e-06, "loss": 17.1418, "step": 6628 }, { "epoch": 0.121172793244009, "grad_norm": 7.742130858072729, "learning_rate": 9.783680442350571e-06, "loss": 18.3106, "step": 6629 }, { "epoch": 0.12119107244045552, "grad_norm": 6.462180802164989, "learning_rate": 9.78359430729708e-06, "loss": 17.504, "step": 6630 }, { "epoch": 0.12120935163690204, "grad_norm": 5.869227175449395, "learning_rate": 9.783508155477506e-06, "loss": 17.2101, "step": 6631 }, { "epoch": 0.12122763083334856, "grad_norm": 5.5460802093685455, "learning_rate": 9.78342198689215e-06, "loss": 17.1091, "step": 6632 }, { "epoch": 0.1212459100297951, "grad_norm": 7.030778273171266, "learning_rate": 9.783335801541314e-06, "loss": 17.6532, "step": 6633 }, { "epoch": 0.12126418922624162, "grad_norm": 7.3654630835564125, "learning_rate": 9.783249599425302e-06, "loss": 17.6943, "step": 6634 }, { "epoch": 0.12128246842268814, "grad_norm": 8.340096371958918, "learning_rate": 9.783163380544416e-06, "loss": 18.7983, "step": 6635 }, { "epoch": 0.12130074761913466, "grad_norm": 6.741932066440961, "learning_rate": 9.783077144898957e-06, "loss": 17.7768, "step": 6636 }, { "epoch": 0.12131902681558118, "grad_norm": 7.2685583397958835, "learning_rate": 9.782990892489227e-06, "loss": 17.8446, "step": 6637 }, { "epoch": 0.12133730601202772, "grad_norm": 7.525327853935263, "learning_rate": 9.78290462331553e-06, "loss": 18.0467, "step": 6638 }, { "epoch": 0.12135558520847424, "grad_norm": 7.054087612526331, "learning_rate": 9.782818337378166e-06, "loss": 17.8198, "step": 6639 }, { "epoch": 0.12137386440492076, "grad_norm": 6.507013946601593, "learning_rate": 9.78273203467744e-06, "loss": 17.4221, "step": 6640 }, { "epoch": 0.12139214360136728, "grad_norm": 8.665104638381612, "learning_rate": 9.782645715213651e-06, "loss": 18.4877, "step": 6641 }, { "epoch": 0.1214104227978138, "grad_norm": 6.2512124400221944, "learning_rate": 9.782559378987106e-06, "loss": 17.3166, "step": 6642 }, { "epoch": 0.12142870199426033, "grad_norm": 6.566995098817002, "learning_rate": 9.782473025998105e-06, "loss": 17.5743, "step": 6643 }, { "epoch": 0.12144698119070686, "grad_norm": 8.687345618797119, "learning_rate": 9.782386656246951e-06, "loss": 17.5453, "step": 6644 }, { "epoch": 0.12146526038715338, "grad_norm": 6.810078433225566, "learning_rate": 9.782300269733947e-06, "loss": 17.5568, "step": 6645 }, { "epoch": 0.1214835395835999, "grad_norm": 5.867345526931524, "learning_rate": 9.782213866459395e-06, "loss": 17.2831, "step": 6646 }, { "epoch": 0.12150181878004643, "grad_norm": 6.927825558368225, "learning_rate": 9.7821274464236e-06, "loss": 17.8387, "step": 6647 }, { "epoch": 0.12152009797649295, "grad_norm": 8.042051243900016, "learning_rate": 9.78204100962686e-06, "loss": 18.4047, "step": 6648 }, { "epoch": 0.12153837717293947, "grad_norm": 7.815543835435438, "learning_rate": 9.781954556069484e-06, "loss": 18.4583, "step": 6649 }, { "epoch": 0.121556656369386, "grad_norm": 6.365267516889937, "learning_rate": 9.781868085751772e-06, "loss": 17.7982, "step": 6650 }, { "epoch": 0.12157493556583253, "grad_norm": 7.928597987709175, "learning_rate": 9.781781598674027e-06, "loss": 18.4748, "step": 6651 }, { "epoch": 0.12159321476227905, "grad_norm": 6.770908180356978, "learning_rate": 9.781695094836553e-06, "loss": 17.5607, "step": 6652 }, { "epoch": 0.12161149395872557, "grad_norm": 8.076061530040407, "learning_rate": 9.78160857423965e-06, "loss": 18.4446, "step": 6653 }, { "epoch": 0.1216297731551721, "grad_norm": 6.989353573018683, "learning_rate": 9.781522036883626e-06, "loss": 18.0134, "step": 6654 }, { "epoch": 0.12164805235161863, "grad_norm": 7.163769031715184, "learning_rate": 9.781435482768781e-06, "loss": 17.879, "step": 6655 }, { "epoch": 0.12166633154806515, "grad_norm": 5.748367459077812, "learning_rate": 9.78134891189542e-06, "loss": 17.3384, "step": 6656 }, { "epoch": 0.12168461074451167, "grad_norm": 7.788122691341108, "learning_rate": 9.781262324263846e-06, "loss": 17.8478, "step": 6657 }, { "epoch": 0.1217028899409582, "grad_norm": 6.219641421602568, "learning_rate": 9.781175719874364e-06, "loss": 17.3303, "step": 6658 }, { "epoch": 0.12172116913740472, "grad_norm": 6.823288884294493, "learning_rate": 9.781089098727274e-06, "loss": 17.6334, "step": 6659 }, { "epoch": 0.12173944833385124, "grad_norm": 6.968891928695853, "learning_rate": 9.781002460822883e-06, "loss": 17.7669, "step": 6660 }, { "epoch": 0.12175772753029777, "grad_norm": 6.87242956629981, "learning_rate": 9.780915806161493e-06, "loss": 17.5215, "step": 6661 }, { "epoch": 0.1217760067267443, "grad_norm": 6.415485023430618, "learning_rate": 9.780829134743408e-06, "loss": 17.7493, "step": 6662 }, { "epoch": 0.12179428592319082, "grad_norm": 6.771392867085264, "learning_rate": 9.780742446568932e-06, "loss": 17.6961, "step": 6663 }, { "epoch": 0.12181256511963734, "grad_norm": 6.6995121966166185, "learning_rate": 9.780655741638367e-06, "loss": 17.6009, "step": 6664 }, { "epoch": 0.12183084431608386, "grad_norm": 6.648799284857779, "learning_rate": 9.78056901995202e-06, "loss": 17.2554, "step": 6665 }, { "epoch": 0.12184912351253038, "grad_norm": 5.713634795954836, "learning_rate": 9.780482281510194e-06, "loss": 17.3779, "step": 6666 }, { "epoch": 0.12186740270897692, "grad_norm": 7.321783359721326, "learning_rate": 9.780395526313188e-06, "loss": 18.013, "step": 6667 }, { "epoch": 0.12188568190542344, "grad_norm": 6.368595998721913, "learning_rate": 9.780308754361316e-06, "loss": 17.3284, "step": 6668 }, { "epoch": 0.12190396110186996, "grad_norm": 7.940013034290859, "learning_rate": 9.780221965654874e-06, "loss": 18.2431, "step": 6669 }, { "epoch": 0.12192224029831648, "grad_norm": 8.2597910346473, "learning_rate": 9.780135160194168e-06, "loss": 18.3547, "step": 6670 }, { "epoch": 0.121940519494763, "grad_norm": 8.342409103263948, "learning_rate": 9.780048337979505e-06, "loss": 18.4465, "step": 6671 }, { "epoch": 0.12195879869120954, "grad_norm": 7.896434680159246, "learning_rate": 9.779961499011187e-06, "loss": 18.0811, "step": 6672 }, { "epoch": 0.12197707788765606, "grad_norm": 7.1427655665856955, "learning_rate": 9.779874643289517e-06, "loss": 17.8061, "step": 6673 }, { "epoch": 0.12199535708410258, "grad_norm": 7.285996116609734, "learning_rate": 9.779787770814804e-06, "loss": 17.7508, "step": 6674 }, { "epoch": 0.1220136362805491, "grad_norm": 7.961217904499843, "learning_rate": 9.779700881587349e-06, "loss": 17.8702, "step": 6675 }, { "epoch": 0.12203191547699563, "grad_norm": 9.678940575997192, "learning_rate": 9.779613975607456e-06, "loss": 18.5119, "step": 6676 }, { "epoch": 0.12205019467344215, "grad_norm": 7.305182452069914, "learning_rate": 9.779527052875431e-06, "loss": 18.0059, "step": 6677 }, { "epoch": 0.12206847386988869, "grad_norm": 8.011995951602925, "learning_rate": 9.779440113391578e-06, "loss": 18.1858, "step": 6678 }, { "epoch": 0.12208675306633521, "grad_norm": 7.799713488923933, "learning_rate": 9.779353157156202e-06, "loss": 17.9749, "step": 6679 }, { "epoch": 0.12210503226278173, "grad_norm": 7.960979082936661, "learning_rate": 9.77926618416961e-06, "loss": 18.3202, "step": 6680 }, { "epoch": 0.12212331145922825, "grad_norm": 6.171541488608712, "learning_rate": 9.779179194432102e-06, "loss": 17.2526, "step": 6681 }, { "epoch": 0.12214159065567477, "grad_norm": 6.397595410388515, "learning_rate": 9.779092187943988e-06, "loss": 17.7538, "step": 6682 }, { "epoch": 0.1221598698521213, "grad_norm": 7.348032278924981, "learning_rate": 9.779005164705568e-06, "loss": 17.7268, "step": 6683 }, { "epoch": 0.12217814904856783, "grad_norm": 6.751145450140728, "learning_rate": 9.778918124717151e-06, "loss": 17.9829, "step": 6684 }, { "epoch": 0.12219642824501435, "grad_norm": 6.997427910648309, "learning_rate": 9.778831067979043e-06, "loss": 17.7092, "step": 6685 }, { "epoch": 0.12221470744146087, "grad_norm": 6.570589473132914, "learning_rate": 9.778743994491544e-06, "loss": 17.425, "step": 6686 }, { "epoch": 0.1222329866379074, "grad_norm": 8.874818025027167, "learning_rate": 9.778656904254962e-06, "loss": 17.9388, "step": 6687 }, { "epoch": 0.12225126583435392, "grad_norm": 6.758030485303756, "learning_rate": 9.778569797269604e-06, "loss": 17.8268, "step": 6688 }, { "epoch": 0.12226954503080045, "grad_norm": 6.9498680833266455, "learning_rate": 9.778482673535772e-06, "loss": 17.806, "step": 6689 }, { "epoch": 0.12228782422724697, "grad_norm": 7.238401612147517, "learning_rate": 9.778395533053772e-06, "loss": 17.9575, "step": 6690 }, { "epoch": 0.1223061034236935, "grad_norm": 7.79205501513286, "learning_rate": 9.778308375823912e-06, "loss": 18.188, "step": 6691 }, { "epoch": 0.12232438262014002, "grad_norm": 8.232803048564039, "learning_rate": 9.778221201846496e-06, "loss": 17.9286, "step": 6692 }, { "epoch": 0.12234266181658654, "grad_norm": 7.463984094963095, "learning_rate": 9.778134011121829e-06, "loss": 17.9496, "step": 6693 }, { "epoch": 0.12236094101303306, "grad_norm": 6.277175201204437, "learning_rate": 9.778046803650216e-06, "loss": 17.3479, "step": 6694 }, { "epoch": 0.1223792202094796, "grad_norm": 7.628884492213488, "learning_rate": 9.777959579431964e-06, "loss": 17.9509, "step": 6695 }, { "epoch": 0.12239749940592612, "grad_norm": 6.142749128251153, "learning_rate": 9.77787233846738e-06, "loss": 17.1414, "step": 6696 }, { "epoch": 0.12241577860237264, "grad_norm": 7.916450165820055, "learning_rate": 9.777785080756765e-06, "loss": 18.203, "step": 6697 }, { "epoch": 0.12243405779881916, "grad_norm": 7.338705383324853, "learning_rate": 9.77769780630043e-06, "loss": 17.9081, "step": 6698 }, { "epoch": 0.12245233699526568, "grad_norm": 8.047757701247363, "learning_rate": 9.777610515098677e-06, "loss": 18.1902, "step": 6699 }, { "epoch": 0.1224706161917122, "grad_norm": 7.780277635133266, "learning_rate": 9.777523207151816e-06, "loss": 17.9597, "step": 6700 }, { "epoch": 0.12248889538815874, "grad_norm": 5.451371512759478, "learning_rate": 9.777435882460149e-06, "loss": 17.0746, "step": 6701 }, { "epoch": 0.12250717458460526, "grad_norm": 5.919984741863563, "learning_rate": 9.777348541023986e-06, "loss": 17.3077, "step": 6702 }, { "epoch": 0.12252545378105179, "grad_norm": 6.335150978536211, "learning_rate": 9.777261182843627e-06, "loss": 17.4041, "step": 6703 }, { "epoch": 0.12254373297749831, "grad_norm": 7.911778775410028, "learning_rate": 9.777173807919386e-06, "loss": 17.6603, "step": 6704 }, { "epoch": 0.12256201217394483, "grad_norm": 6.039306447646183, "learning_rate": 9.777086416251564e-06, "loss": 17.2483, "step": 6705 }, { "epoch": 0.12258029137039136, "grad_norm": 6.443788852913615, "learning_rate": 9.77699900784047e-06, "loss": 17.5087, "step": 6706 }, { "epoch": 0.12259857056683789, "grad_norm": 7.261665013006005, "learning_rate": 9.776911582686405e-06, "loss": 17.8151, "step": 6707 }, { "epoch": 0.12261684976328441, "grad_norm": 6.636859775767234, "learning_rate": 9.776824140789683e-06, "loss": 17.4764, "step": 6708 }, { "epoch": 0.12263512895973093, "grad_norm": 7.254475992626495, "learning_rate": 9.776736682150606e-06, "loss": 17.8066, "step": 6709 }, { "epoch": 0.12265340815617745, "grad_norm": 7.229741150410805, "learning_rate": 9.77664920676948e-06, "loss": 18.0929, "step": 6710 }, { "epoch": 0.12267168735262397, "grad_norm": 6.5449328205287225, "learning_rate": 9.776561714646616e-06, "loss": 17.3675, "step": 6711 }, { "epoch": 0.12268996654907051, "grad_norm": 6.250228378134764, "learning_rate": 9.776474205782315e-06, "loss": 17.5722, "step": 6712 }, { "epoch": 0.12270824574551703, "grad_norm": 6.9380784567055995, "learning_rate": 9.776386680176888e-06, "loss": 17.8353, "step": 6713 }, { "epoch": 0.12272652494196355, "grad_norm": 5.478467383133045, "learning_rate": 9.776299137830638e-06, "loss": 17.147, "step": 6714 }, { "epoch": 0.12274480413841007, "grad_norm": 8.9457506259024, "learning_rate": 9.776211578743875e-06, "loss": 18.5953, "step": 6715 }, { "epoch": 0.1227630833348566, "grad_norm": 7.88474119114791, "learning_rate": 9.776124002916907e-06, "loss": 18.0582, "step": 6716 }, { "epoch": 0.12278136253130312, "grad_norm": 7.879462358077435, "learning_rate": 9.776036410350035e-06, "loss": 17.8196, "step": 6717 }, { "epoch": 0.12279964172774965, "grad_norm": 7.4188074690427985, "learning_rate": 9.775948801043573e-06, "loss": 17.9607, "step": 6718 }, { "epoch": 0.12281792092419618, "grad_norm": 10.335397547280172, "learning_rate": 9.775861174997824e-06, "loss": 17.7169, "step": 6719 }, { "epoch": 0.1228362001206427, "grad_norm": 6.594194572109577, "learning_rate": 9.775773532213096e-06, "loss": 17.6715, "step": 6720 }, { "epoch": 0.12285447931708922, "grad_norm": 6.208944364137212, "learning_rate": 9.775685872689696e-06, "loss": 17.3092, "step": 6721 }, { "epoch": 0.12287275851353574, "grad_norm": 5.93769402082507, "learning_rate": 9.775598196427931e-06, "loss": 17.423, "step": 6722 }, { "epoch": 0.12289103770998228, "grad_norm": 10.046276137696408, "learning_rate": 9.77551050342811e-06, "loss": 18.779, "step": 6723 }, { "epoch": 0.1229093169064288, "grad_norm": 7.706093370283888, "learning_rate": 9.775422793690539e-06, "loss": 17.8201, "step": 6724 }, { "epoch": 0.12292759610287532, "grad_norm": 6.828056449910487, "learning_rate": 9.775335067215524e-06, "loss": 17.6539, "step": 6725 }, { "epoch": 0.12294587529932184, "grad_norm": 7.260831132405209, "learning_rate": 9.775247324003375e-06, "loss": 17.9743, "step": 6726 }, { "epoch": 0.12296415449576836, "grad_norm": 7.853148072594991, "learning_rate": 9.775159564054398e-06, "loss": 18.0551, "step": 6727 }, { "epoch": 0.12298243369221488, "grad_norm": 6.893374319447, "learning_rate": 9.775071787368902e-06, "loss": 17.907, "step": 6728 }, { "epoch": 0.12300071288866142, "grad_norm": 6.796229898152696, "learning_rate": 9.774983993947194e-06, "loss": 17.591, "step": 6729 }, { "epoch": 0.12301899208510794, "grad_norm": 7.352417629549728, "learning_rate": 9.774896183789579e-06, "loss": 18.0169, "step": 6730 }, { "epoch": 0.12303727128155446, "grad_norm": 7.420712217607259, "learning_rate": 9.77480835689637e-06, "loss": 17.8592, "step": 6731 }, { "epoch": 0.12305555047800099, "grad_norm": 7.79655590706962, "learning_rate": 9.77472051326787e-06, "loss": 18.2393, "step": 6732 }, { "epoch": 0.12307382967444751, "grad_norm": 8.562040061722492, "learning_rate": 9.77463265290439e-06, "loss": 18.5743, "step": 6733 }, { "epoch": 0.12309210887089403, "grad_norm": 7.419169310113422, "learning_rate": 9.774544775806238e-06, "loss": 17.645, "step": 6734 }, { "epoch": 0.12311038806734056, "grad_norm": 7.62493487074596, "learning_rate": 9.774456881973718e-06, "loss": 18.0855, "step": 6735 }, { "epoch": 0.12312866726378709, "grad_norm": 5.854997719179686, "learning_rate": 9.774368971407143e-06, "loss": 17.136, "step": 6736 }, { "epoch": 0.12314694646023361, "grad_norm": 8.432702579713537, "learning_rate": 9.774281044106818e-06, "loss": 18.8493, "step": 6737 }, { "epoch": 0.12316522565668013, "grad_norm": 7.084231465389021, "learning_rate": 9.774193100073054e-06, "loss": 17.6815, "step": 6738 }, { "epoch": 0.12318350485312665, "grad_norm": 7.2206962214256105, "learning_rate": 9.774105139306156e-06, "loss": 17.8568, "step": 6739 }, { "epoch": 0.12320178404957319, "grad_norm": 8.011314791988577, "learning_rate": 9.774017161806434e-06, "loss": 18.0389, "step": 6740 }, { "epoch": 0.12322006324601971, "grad_norm": 6.809095578692524, "learning_rate": 9.773929167574197e-06, "loss": 17.3972, "step": 6741 }, { "epoch": 0.12323834244246623, "grad_norm": 6.631416005685428, "learning_rate": 9.773841156609751e-06, "loss": 17.4618, "step": 6742 }, { "epoch": 0.12325662163891275, "grad_norm": 6.491790080792121, "learning_rate": 9.773753128913406e-06, "loss": 17.4074, "step": 6743 }, { "epoch": 0.12327490083535927, "grad_norm": 7.8694925596265906, "learning_rate": 9.773665084485472e-06, "loss": 18.1842, "step": 6744 }, { "epoch": 0.1232931800318058, "grad_norm": 7.138056421575226, "learning_rate": 9.773577023326255e-06, "loss": 17.8597, "step": 6745 }, { "epoch": 0.12331145922825233, "grad_norm": 7.1014946326973405, "learning_rate": 9.773488945436064e-06, "loss": 17.6909, "step": 6746 }, { "epoch": 0.12332973842469885, "grad_norm": 8.25137421905207, "learning_rate": 9.77340085081521e-06, "loss": 17.7142, "step": 6747 }, { "epoch": 0.12334801762114538, "grad_norm": 7.22933512730961, "learning_rate": 9.773312739464001e-06, "loss": 17.8855, "step": 6748 }, { "epoch": 0.1233662968175919, "grad_norm": 6.406227023502943, "learning_rate": 9.773224611382744e-06, "loss": 17.4731, "step": 6749 }, { "epoch": 0.12338457601403842, "grad_norm": 6.29912276582381, "learning_rate": 9.77313646657175e-06, "loss": 17.3361, "step": 6750 }, { "epoch": 0.12340285521048494, "grad_norm": 7.163796354375712, "learning_rate": 9.773048305031324e-06, "loss": 17.9806, "step": 6751 }, { "epoch": 0.12342113440693148, "grad_norm": 8.083667017627093, "learning_rate": 9.772960126761779e-06, "loss": 17.6488, "step": 6752 }, { "epoch": 0.123439413603378, "grad_norm": 9.335094240895968, "learning_rate": 9.772871931763423e-06, "loss": 18.4421, "step": 6753 }, { "epoch": 0.12345769279982452, "grad_norm": 8.650517180629848, "learning_rate": 9.772783720036566e-06, "loss": 18.2238, "step": 6754 }, { "epoch": 0.12347597199627104, "grad_norm": 7.408655689107285, "learning_rate": 9.772695491581517e-06, "loss": 18.178, "step": 6755 }, { "epoch": 0.12349425119271756, "grad_norm": 6.272185991151557, "learning_rate": 9.772607246398582e-06, "loss": 17.3344, "step": 6756 }, { "epoch": 0.1235125303891641, "grad_norm": 8.625669454668147, "learning_rate": 9.772518984488076e-06, "loss": 18.3302, "step": 6757 }, { "epoch": 0.12353080958561062, "grad_norm": 8.106328974221528, "learning_rate": 9.772430705850302e-06, "loss": 18.0292, "step": 6758 }, { "epoch": 0.12354908878205714, "grad_norm": 7.396995596141605, "learning_rate": 9.772342410485574e-06, "loss": 18.1042, "step": 6759 }, { "epoch": 0.12356736797850366, "grad_norm": 8.424871113247226, "learning_rate": 9.772254098394199e-06, "loss": 18.2745, "step": 6760 }, { "epoch": 0.12358564717495019, "grad_norm": 7.550498496474941, "learning_rate": 9.772165769576487e-06, "loss": 17.8764, "step": 6761 }, { "epoch": 0.12360392637139671, "grad_norm": 6.283517836040569, "learning_rate": 9.77207742403275e-06, "loss": 17.5406, "step": 6762 }, { "epoch": 0.12362220556784324, "grad_norm": 7.32837620388845, "learning_rate": 9.771989061763295e-06, "loss": 17.9969, "step": 6763 }, { "epoch": 0.12364048476428977, "grad_norm": 6.999174504920259, "learning_rate": 9.771900682768431e-06, "loss": 17.6985, "step": 6764 }, { "epoch": 0.12365876396073629, "grad_norm": 7.353166074856369, "learning_rate": 9.771812287048473e-06, "loss": 17.9816, "step": 6765 }, { "epoch": 0.12367704315718281, "grad_norm": 6.872916666751801, "learning_rate": 9.771723874603722e-06, "loss": 17.7053, "step": 6766 }, { "epoch": 0.12369532235362933, "grad_norm": 10.832766572986543, "learning_rate": 9.771635445434497e-06, "loss": 17.7978, "step": 6767 }, { "epoch": 0.12371360155007585, "grad_norm": 6.904832355542965, "learning_rate": 9.771546999541101e-06, "loss": 18.0874, "step": 6768 }, { "epoch": 0.12373188074652239, "grad_norm": 8.029770332971117, "learning_rate": 9.77145853692385e-06, "loss": 17.9579, "step": 6769 }, { "epoch": 0.12375015994296891, "grad_norm": 7.006988683324624, "learning_rate": 9.771370057583047e-06, "loss": 17.7966, "step": 6770 }, { "epoch": 0.12376843913941543, "grad_norm": 7.2888792726150955, "learning_rate": 9.771281561519009e-06, "loss": 17.9035, "step": 6771 }, { "epoch": 0.12378671833586195, "grad_norm": 7.46306368992549, "learning_rate": 9.771193048732041e-06, "loss": 17.8343, "step": 6772 }, { "epoch": 0.12380499753230848, "grad_norm": 6.408100073477963, "learning_rate": 9.771104519222457e-06, "loss": 17.3325, "step": 6773 }, { "epoch": 0.12382327672875501, "grad_norm": 8.208083765211683, "learning_rate": 9.771015972990564e-06, "loss": 18.5437, "step": 6774 }, { "epoch": 0.12384155592520153, "grad_norm": 6.773044224298968, "learning_rate": 9.770927410036677e-06, "loss": 17.7102, "step": 6775 }, { "epoch": 0.12385983512164805, "grad_norm": 7.451165839563993, "learning_rate": 9.770838830361101e-06, "loss": 17.8902, "step": 6776 }, { "epoch": 0.12387811431809458, "grad_norm": 7.838133310158093, "learning_rate": 9.77075023396415e-06, "loss": 17.9973, "step": 6777 }, { "epoch": 0.1238963935145411, "grad_norm": 7.657081364273012, "learning_rate": 9.770661620846132e-06, "loss": 17.6833, "step": 6778 }, { "epoch": 0.12391467271098762, "grad_norm": 6.482389867965016, "learning_rate": 9.770572991007362e-06, "loss": 17.4262, "step": 6779 }, { "epoch": 0.12393295190743416, "grad_norm": 6.927975602071881, "learning_rate": 9.770484344448144e-06, "loss": 17.9453, "step": 6780 }, { "epoch": 0.12395123110388068, "grad_norm": 7.236795933527305, "learning_rate": 9.770395681168794e-06, "loss": 17.639, "step": 6781 }, { "epoch": 0.1239695103003272, "grad_norm": 6.495468232258661, "learning_rate": 9.770307001169621e-06, "loss": 17.3891, "step": 6782 }, { "epoch": 0.12398778949677372, "grad_norm": 8.383316197209089, "learning_rate": 9.770218304450935e-06, "loss": 18.4597, "step": 6783 }, { "epoch": 0.12400606869322024, "grad_norm": 5.72011174779133, "learning_rate": 9.770129591013049e-06, "loss": 17.4688, "step": 6784 }, { "epoch": 0.12402434788966676, "grad_norm": 6.9603655146625325, "learning_rate": 9.770040860856273e-06, "loss": 17.9703, "step": 6785 }, { "epoch": 0.1240426270861133, "grad_norm": 8.70264578219701, "learning_rate": 9.769952113980917e-06, "loss": 18.574, "step": 6786 }, { "epoch": 0.12406090628255982, "grad_norm": 7.579062368171308, "learning_rate": 9.769863350387293e-06, "loss": 18.1362, "step": 6787 }, { "epoch": 0.12407918547900634, "grad_norm": 6.936197691883063, "learning_rate": 9.769774570075711e-06, "loss": 17.8093, "step": 6788 }, { "epoch": 0.12409746467545286, "grad_norm": 7.832351463230458, "learning_rate": 9.769685773046484e-06, "loss": 18.2735, "step": 6789 }, { "epoch": 0.12411574387189939, "grad_norm": 8.877510471961573, "learning_rate": 9.769596959299923e-06, "loss": 18.4219, "step": 6790 }, { "epoch": 0.12413402306834592, "grad_norm": 7.385366838914088, "learning_rate": 9.769508128836338e-06, "loss": 17.6268, "step": 6791 }, { "epoch": 0.12415230226479244, "grad_norm": 8.63879098043357, "learning_rate": 9.76941928165604e-06, "loss": 18.8794, "step": 6792 }, { "epoch": 0.12417058146123897, "grad_norm": 7.084520759446865, "learning_rate": 9.769330417759342e-06, "loss": 17.846, "step": 6793 }, { "epoch": 0.12418886065768549, "grad_norm": 6.977025543661845, "learning_rate": 9.769241537146555e-06, "loss": 17.5868, "step": 6794 }, { "epoch": 0.12420713985413201, "grad_norm": 8.314269462705171, "learning_rate": 9.769152639817988e-06, "loss": 18.8014, "step": 6795 }, { "epoch": 0.12422541905057853, "grad_norm": 7.26374498994195, "learning_rate": 9.769063725773957e-06, "loss": 18.0784, "step": 6796 }, { "epoch": 0.12424369824702507, "grad_norm": 7.750043978676372, "learning_rate": 9.768974795014772e-06, "loss": 17.9159, "step": 6797 }, { "epoch": 0.12426197744347159, "grad_norm": 7.184436727363733, "learning_rate": 9.768885847540743e-06, "loss": 17.7996, "step": 6798 }, { "epoch": 0.12428025663991811, "grad_norm": 6.598999271682578, "learning_rate": 9.768796883352183e-06, "loss": 17.9671, "step": 6799 }, { "epoch": 0.12429853583636463, "grad_norm": 6.794746580189616, "learning_rate": 9.768707902449403e-06, "loss": 17.5868, "step": 6800 }, { "epoch": 0.12431681503281115, "grad_norm": 8.294978666463557, "learning_rate": 9.768618904832718e-06, "loss": 17.8646, "step": 6801 }, { "epoch": 0.12433509422925768, "grad_norm": 6.481829932916731, "learning_rate": 9.768529890502435e-06, "loss": 17.5915, "step": 6802 }, { "epoch": 0.12435337342570421, "grad_norm": 8.799674341553661, "learning_rate": 9.76844085945887e-06, "loss": 17.8731, "step": 6803 }, { "epoch": 0.12437165262215073, "grad_norm": 6.441341548209323, "learning_rate": 9.768351811702333e-06, "loss": 17.5145, "step": 6804 }, { "epoch": 0.12438993181859725, "grad_norm": 7.724334044183744, "learning_rate": 9.768262747233137e-06, "loss": 17.9513, "step": 6805 }, { "epoch": 0.12440821101504378, "grad_norm": 8.228683522833085, "learning_rate": 9.768173666051594e-06, "loss": 17.9028, "step": 6806 }, { "epoch": 0.1244264902114903, "grad_norm": 7.413906193257426, "learning_rate": 9.768084568158015e-06, "loss": 17.8082, "step": 6807 }, { "epoch": 0.12444476940793683, "grad_norm": 8.965826870301163, "learning_rate": 9.767995453552714e-06, "loss": 18.545, "step": 6808 }, { "epoch": 0.12446304860438336, "grad_norm": 7.178717823470608, "learning_rate": 9.767906322236002e-06, "loss": 17.8343, "step": 6809 }, { "epoch": 0.12448132780082988, "grad_norm": 8.822238423199295, "learning_rate": 9.767817174208194e-06, "loss": 18.6875, "step": 6810 }, { "epoch": 0.1244996069972764, "grad_norm": 6.395336496052081, "learning_rate": 9.7677280094696e-06, "loss": 17.4369, "step": 6811 }, { "epoch": 0.12451788619372292, "grad_norm": 6.550944616976022, "learning_rate": 9.767638828020532e-06, "loss": 17.5714, "step": 6812 }, { "epoch": 0.12453616539016944, "grad_norm": 7.3943820592035525, "learning_rate": 9.767549629861304e-06, "loss": 17.8888, "step": 6813 }, { "epoch": 0.12455444458661598, "grad_norm": 6.955204796907054, "learning_rate": 9.767460414992229e-06, "loss": 17.688, "step": 6814 }, { "epoch": 0.1245727237830625, "grad_norm": 9.371640516387902, "learning_rate": 9.767371183413619e-06, "loss": 17.7048, "step": 6815 }, { "epoch": 0.12459100297950902, "grad_norm": 6.945829805828666, "learning_rate": 9.767281935125785e-06, "loss": 17.9757, "step": 6816 }, { "epoch": 0.12460928217595554, "grad_norm": 8.224404276896458, "learning_rate": 9.767192670129042e-06, "loss": 18.285, "step": 6817 }, { "epoch": 0.12462756137240207, "grad_norm": 6.9793598341825165, "learning_rate": 9.767103388423704e-06, "loss": 17.9039, "step": 6818 }, { "epoch": 0.12464584056884859, "grad_norm": 6.452361576578343, "learning_rate": 9.767014090010081e-06, "loss": 17.5745, "step": 6819 }, { "epoch": 0.12466411976529512, "grad_norm": 7.7401983160125445, "learning_rate": 9.766924774888487e-06, "loss": 17.6759, "step": 6820 }, { "epoch": 0.12468239896174164, "grad_norm": 6.461913283266975, "learning_rate": 9.766835443059235e-06, "loss": 17.2202, "step": 6821 }, { "epoch": 0.12470067815818817, "grad_norm": 6.408831285939423, "learning_rate": 9.76674609452264e-06, "loss": 17.4377, "step": 6822 }, { "epoch": 0.12471895735463469, "grad_norm": 6.4698280102972285, "learning_rate": 9.766656729279012e-06, "loss": 17.3235, "step": 6823 }, { "epoch": 0.12473723655108121, "grad_norm": 7.456689630962659, "learning_rate": 9.766567347328667e-06, "loss": 18.078, "step": 6824 }, { "epoch": 0.12475551574752775, "grad_norm": 6.2871557378464615, "learning_rate": 9.766477948671918e-06, "loss": 17.3276, "step": 6825 }, { "epoch": 0.12477379494397427, "grad_norm": 6.4628018911268805, "learning_rate": 9.766388533309075e-06, "loss": 17.4461, "step": 6826 }, { "epoch": 0.12479207414042079, "grad_norm": 8.798310200953944, "learning_rate": 9.766299101240455e-06, "loss": 18.3388, "step": 6827 }, { "epoch": 0.12481035333686731, "grad_norm": 9.01801958880886, "learning_rate": 9.76620965246637e-06, "loss": 18.0676, "step": 6828 }, { "epoch": 0.12482863253331383, "grad_norm": 7.637732117289141, "learning_rate": 9.766120186987134e-06, "loss": 17.857, "step": 6829 }, { "epoch": 0.12484691172976035, "grad_norm": 7.3498358064409155, "learning_rate": 9.76603070480306e-06, "loss": 17.7859, "step": 6830 }, { "epoch": 0.12486519092620689, "grad_norm": 7.96401297844795, "learning_rate": 9.765941205914461e-06, "loss": 17.8464, "step": 6831 }, { "epoch": 0.12488347012265341, "grad_norm": 7.491558225034503, "learning_rate": 9.765851690321652e-06, "loss": 18.0553, "step": 6832 }, { "epoch": 0.12490174931909993, "grad_norm": 6.684682411726688, "learning_rate": 9.765762158024948e-06, "loss": 17.9563, "step": 6833 }, { "epoch": 0.12492002851554646, "grad_norm": 7.678492114914815, "learning_rate": 9.765672609024662e-06, "loss": 18.0928, "step": 6834 }, { "epoch": 0.12493830771199298, "grad_norm": 6.75221266810973, "learning_rate": 9.765583043321104e-06, "loss": 17.642, "step": 6835 }, { "epoch": 0.1249565869084395, "grad_norm": 5.652893022972405, "learning_rate": 9.765493460914592e-06, "loss": 17.2443, "step": 6836 }, { "epoch": 0.12497486610488603, "grad_norm": 7.211616485499025, "learning_rate": 9.76540386180544e-06, "loss": 17.5644, "step": 6837 }, { "epoch": 0.12499314530133256, "grad_norm": 7.491578036573935, "learning_rate": 9.76531424599396e-06, "loss": 18.2474, "step": 6838 }, { "epoch": 0.12501142449777908, "grad_norm": 8.106648613675649, "learning_rate": 9.765224613480468e-06, "loss": 18.2582, "step": 6839 }, { "epoch": 0.1250297036942256, "grad_norm": 7.301726902083591, "learning_rate": 9.765134964265277e-06, "loss": 17.7908, "step": 6840 }, { "epoch": 0.12504798289067212, "grad_norm": 5.857463158845806, "learning_rate": 9.765045298348701e-06, "loss": 17.3778, "step": 6841 }, { "epoch": 0.12506626208711866, "grad_norm": 6.6013829363255, "learning_rate": 9.764955615731054e-06, "loss": 17.5809, "step": 6842 }, { "epoch": 0.12508454128356516, "grad_norm": 7.349650909699515, "learning_rate": 9.764865916412651e-06, "loss": 18.0713, "step": 6843 }, { "epoch": 0.1251028204800117, "grad_norm": 7.1705446033353555, "learning_rate": 9.764776200393809e-06, "loss": 17.7023, "step": 6844 }, { "epoch": 0.12512109967645824, "grad_norm": 9.594565997134804, "learning_rate": 9.764686467674837e-06, "loss": 18.6706, "step": 6845 }, { "epoch": 0.12513937887290474, "grad_norm": 6.26969894834299, "learning_rate": 9.764596718256054e-06, "loss": 17.4378, "step": 6846 }, { "epoch": 0.12515765806935128, "grad_norm": 6.419926332413454, "learning_rate": 9.764506952137772e-06, "loss": 17.5315, "step": 6847 }, { "epoch": 0.1251759372657978, "grad_norm": 7.022424959197518, "learning_rate": 9.764417169320308e-06, "loss": 17.9142, "step": 6848 }, { "epoch": 0.12519421646224432, "grad_norm": 8.789123413034922, "learning_rate": 9.764327369803974e-06, "loss": 18.9441, "step": 6849 }, { "epoch": 0.12521249565869083, "grad_norm": 8.052600772208601, "learning_rate": 9.764237553589086e-06, "loss": 18.049, "step": 6850 }, { "epoch": 0.12523077485513737, "grad_norm": 7.815449913994883, "learning_rate": 9.764147720675959e-06, "loss": 17.9423, "step": 6851 }, { "epoch": 0.1252490540515839, "grad_norm": 7.716998644654304, "learning_rate": 9.764057871064908e-06, "loss": 18.2239, "step": 6852 }, { "epoch": 0.1252673332480304, "grad_norm": 7.985093938302353, "learning_rate": 9.763968004756248e-06, "loss": 17.9283, "step": 6853 }, { "epoch": 0.12528561244447695, "grad_norm": 5.863768231151736, "learning_rate": 9.763878121750293e-06, "loss": 17.1499, "step": 6854 }, { "epoch": 0.12530389164092345, "grad_norm": 7.512199977568955, "learning_rate": 9.763788222047358e-06, "loss": 17.7701, "step": 6855 }, { "epoch": 0.12532217083737, "grad_norm": 7.015726812898855, "learning_rate": 9.76369830564776e-06, "loss": 17.7275, "step": 6856 }, { "epoch": 0.12534045003381653, "grad_norm": 8.576731202571079, "learning_rate": 9.763608372551812e-06, "loss": 18.4529, "step": 6857 }, { "epoch": 0.12535872923026303, "grad_norm": 9.091430597613348, "learning_rate": 9.763518422759829e-06, "loss": 18.7546, "step": 6858 }, { "epoch": 0.12537700842670957, "grad_norm": 6.60377192208082, "learning_rate": 9.763428456272127e-06, "loss": 17.4729, "step": 6859 }, { "epoch": 0.12539528762315608, "grad_norm": 6.8105219824174545, "learning_rate": 9.763338473089023e-06, "loss": 17.6938, "step": 6860 }, { "epoch": 0.1254135668196026, "grad_norm": 6.301755296093012, "learning_rate": 9.76324847321083e-06, "loss": 17.3673, "step": 6861 }, { "epoch": 0.12543184601604915, "grad_norm": 9.262335557793726, "learning_rate": 9.763158456637868e-06, "loss": 18.244, "step": 6862 }, { "epoch": 0.12545012521249566, "grad_norm": 6.558293955485627, "learning_rate": 9.763068423370446e-06, "loss": 17.7602, "step": 6863 }, { "epoch": 0.1254684044089422, "grad_norm": 6.851336277162645, "learning_rate": 9.762978373408882e-06, "loss": 17.8758, "step": 6864 }, { "epoch": 0.1254866836053887, "grad_norm": 6.178099064026581, "learning_rate": 9.762888306753493e-06, "loss": 17.2753, "step": 6865 }, { "epoch": 0.12550496280183523, "grad_norm": 7.971854847632996, "learning_rate": 9.762798223404595e-06, "loss": 18.2635, "step": 6866 }, { "epoch": 0.12552324199828174, "grad_norm": 7.461915683024421, "learning_rate": 9.7627081233625e-06, "loss": 17.9847, "step": 6867 }, { "epoch": 0.12554152119472828, "grad_norm": 6.167959628386896, "learning_rate": 9.762618006627526e-06, "loss": 17.1975, "step": 6868 }, { "epoch": 0.12555980039117481, "grad_norm": 5.904957131953893, "learning_rate": 9.762527873199991e-06, "loss": 17.3502, "step": 6869 }, { "epoch": 0.12557807958762132, "grad_norm": 7.319643468981985, "learning_rate": 9.762437723080209e-06, "loss": 17.8596, "step": 6870 }, { "epoch": 0.12559635878406786, "grad_norm": 6.776461760249466, "learning_rate": 9.762347556268497e-06, "loss": 17.5943, "step": 6871 }, { "epoch": 0.12561463798051437, "grad_norm": 6.945555187723439, "learning_rate": 9.762257372765169e-06, "loss": 17.6469, "step": 6872 }, { "epoch": 0.1256329171769609, "grad_norm": 7.5795008587308175, "learning_rate": 9.762167172570541e-06, "loss": 18.1053, "step": 6873 }, { "epoch": 0.12565119637340744, "grad_norm": 7.625446892507235, "learning_rate": 9.762076955684932e-06, "loss": 18.0321, "step": 6874 }, { "epoch": 0.12566947556985394, "grad_norm": 7.550851598821339, "learning_rate": 9.761986722108656e-06, "loss": 18.0622, "step": 6875 }, { "epoch": 0.12568775476630048, "grad_norm": 5.343112907295592, "learning_rate": 9.761896471842029e-06, "loss": 17.2661, "step": 6876 }, { "epoch": 0.125706033962747, "grad_norm": 6.896208545413777, "learning_rate": 9.76180620488537e-06, "loss": 17.6864, "step": 6877 }, { "epoch": 0.12572431315919352, "grad_norm": 7.185623538717204, "learning_rate": 9.76171592123899e-06, "loss": 17.2856, "step": 6878 }, { "epoch": 0.12574259235564006, "grad_norm": 7.686167353778615, "learning_rate": 9.761625620903212e-06, "loss": 18.3195, "step": 6879 }, { "epoch": 0.12576087155208657, "grad_norm": 8.296321085350883, "learning_rate": 9.761535303878349e-06, "loss": 18.4403, "step": 6880 }, { "epoch": 0.1257791507485331, "grad_norm": 6.768592561279704, "learning_rate": 9.761444970164717e-06, "loss": 17.6828, "step": 6881 }, { "epoch": 0.1257974299449796, "grad_norm": 7.435027612748613, "learning_rate": 9.761354619762634e-06, "loss": 17.8256, "step": 6882 }, { "epoch": 0.12581570914142615, "grad_norm": 7.849360277751968, "learning_rate": 9.761264252672416e-06, "loss": 18.2053, "step": 6883 }, { "epoch": 0.12583398833787265, "grad_norm": 6.429397492132217, "learning_rate": 9.76117386889438e-06, "loss": 17.4916, "step": 6884 }, { "epoch": 0.1258522675343192, "grad_norm": 6.067239574248806, "learning_rate": 9.761083468428843e-06, "loss": 17.3733, "step": 6885 }, { "epoch": 0.12587054673076573, "grad_norm": 8.315848729518695, "learning_rate": 9.760993051276121e-06, "loss": 18.2736, "step": 6886 }, { "epoch": 0.12588882592721223, "grad_norm": 7.32837061349687, "learning_rate": 9.760902617436532e-06, "loss": 17.9103, "step": 6887 }, { "epoch": 0.12590710512365877, "grad_norm": 7.383107886144405, "learning_rate": 9.760812166910391e-06, "loss": 17.6546, "step": 6888 }, { "epoch": 0.12592538432010528, "grad_norm": 7.476709793828603, "learning_rate": 9.760721699698019e-06, "loss": 18.0114, "step": 6889 }, { "epoch": 0.1259436635165518, "grad_norm": 9.167709907246126, "learning_rate": 9.76063121579973e-06, "loss": 18.7377, "step": 6890 }, { "epoch": 0.12596194271299835, "grad_norm": 7.271487503461663, "learning_rate": 9.76054071521584e-06, "loss": 17.9853, "step": 6891 }, { "epoch": 0.12598022190944486, "grad_norm": 6.085035177977042, "learning_rate": 9.760450197946669e-06, "loss": 17.4638, "step": 6892 }, { "epoch": 0.1259985011058914, "grad_norm": 6.552638913672762, "learning_rate": 9.760359663992534e-06, "loss": 17.5143, "step": 6893 }, { "epoch": 0.1260167803023379, "grad_norm": 7.28866953297157, "learning_rate": 9.760269113353751e-06, "loss": 17.7996, "step": 6894 }, { "epoch": 0.12603505949878444, "grad_norm": 7.506247584791319, "learning_rate": 9.760178546030638e-06, "loss": 18.177, "step": 6895 }, { "epoch": 0.12605333869523097, "grad_norm": 7.568777077230304, "learning_rate": 9.76008796202351e-06, "loss": 17.8969, "step": 6896 }, { "epoch": 0.12607161789167748, "grad_norm": 6.8513200679177775, "learning_rate": 9.75999736133269e-06, "loss": 17.728, "step": 6897 }, { "epoch": 0.12608989708812401, "grad_norm": 6.321393301559122, "learning_rate": 9.75990674395849e-06, "loss": 17.4091, "step": 6898 }, { "epoch": 0.12610817628457052, "grad_norm": 5.671400566827995, "learning_rate": 9.75981610990123e-06, "loss": 17.1359, "step": 6899 }, { "epoch": 0.12612645548101706, "grad_norm": 7.333047507187936, "learning_rate": 9.759725459161229e-06, "loss": 17.8416, "step": 6900 }, { "epoch": 0.12614473467746357, "grad_norm": 7.014759835628037, "learning_rate": 9.759634791738803e-06, "loss": 17.8615, "step": 6901 }, { "epoch": 0.1261630138739101, "grad_norm": 7.572353559355617, "learning_rate": 9.75954410763427e-06, "loss": 18.027, "step": 6902 }, { "epoch": 0.12618129307035664, "grad_norm": 7.869481255483959, "learning_rate": 9.759453406847948e-06, "loss": 18.0191, "step": 6903 }, { "epoch": 0.12619957226680314, "grad_norm": 6.993283310444016, "learning_rate": 9.759362689380154e-06, "loss": 17.9355, "step": 6904 }, { "epoch": 0.12621785146324968, "grad_norm": 7.073599533523402, "learning_rate": 9.759271955231207e-06, "loss": 17.9398, "step": 6905 }, { "epoch": 0.1262361306596962, "grad_norm": 6.7543327331622915, "learning_rate": 9.759181204401425e-06, "loss": 17.6489, "step": 6906 }, { "epoch": 0.12625440985614272, "grad_norm": 7.396854189496331, "learning_rate": 9.759090436891126e-06, "loss": 18.0247, "step": 6907 }, { "epoch": 0.12627268905258926, "grad_norm": 7.473337310410864, "learning_rate": 9.758999652700628e-06, "loss": 17.9618, "step": 6908 }, { "epoch": 0.12629096824903577, "grad_norm": 6.54808082938672, "learning_rate": 9.758908851830248e-06, "loss": 17.6502, "step": 6909 }, { "epoch": 0.1263092474454823, "grad_norm": 7.962637624156711, "learning_rate": 9.758818034280306e-06, "loss": 18.1697, "step": 6910 }, { "epoch": 0.1263275266419288, "grad_norm": 8.316194122517786, "learning_rate": 9.75872720005112e-06, "loss": 18.2445, "step": 6911 }, { "epoch": 0.12634580583837535, "grad_norm": 9.076794257119204, "learning_rate": 9.758636349143008e-06, "loss": 18.2431, "step": 6912 }, { "epoch": 0.12636408503482188, "grad_norm": 8.080425757154485, "learning_rate": 9.758545481556289e-06, "loss": 17.9978, "step": 6913 }, { "epoch": 0.1263823642312684, "grad_norm": 7.6418440516441475, "learning_rate": 9.758454597291282e-06, "loss": 17.9627, "step": 6914 }, { "epoch": 0.12640064342771493, "grad_norm": 6.644158785752914, "learning_rate": 9.758363696348303e-06, "loss": 17.3803, "step": 6915 }, { "epoch": 0.12641892262416143, "grad_norm": 6.916556386304829, "learning_rate": 9.758272778727673e-06, "loss": 17.8038, "step": 6916 }, { "epoch": 0.12643720182060797, "grad_norm": 6.866739546264169, "learning_rate": 9.758181844429709e-06, "loss": 17.7915, "step": 6917 }, { "epoch": 0.12645548101705448, "grad_norm": 6.518695509133655, "learning_rate": 9.75809089345473e-06, "loss": 17.4465, "step": 6918 }, { "epoch": 0.126473760213501, "grad_norm": 7.140291668607941, "learning_rate": 9.757999925803057e-06, "loss": 17.9112, "step": 6919 }, { "epoch": 0.12649203940994755, "grad_norm": 7.05297639968776, "learning_rate": 9.757908941475005e-06, "loss": 17.7311, "step": 6920 }, { "epoch": 0.12651031860639406, "grad_norm": 8.412370846508411, "learning_rate": 9.757817940470898e-06, "loss": 18.1769, "step": 6921 }, { "epoch": 0.1265285978028406, "grad_norm": 7.493224194573999, "learning_rate": 9.75772692279105e-06, "loss": 17.8737, "step": 6922 }, { "epoch": 0.1265468769992871, "grad_norm": 7.492658416453076, "learning_rate": 9.75763588843578e-06, "loss": 17.6434, "step": 6923 }, { "epoch": 0.12656515619573364, "grad_norm": 8.217101745761529, "learning_rate": 9.757544837405413e-06, "loss": 17.9622, "step": 6924 }, { "epoch": 0.12658343539218017, "grad_norm": 6.094890483992856, "learning_rate": 9.757453769700263e-06, "loss": 17.5712, "step": 6925 }, { "epoch": 0.12660171458862668, "grad_norm": 8.286935384573368, "learning_rate": 9.757362685320651e-06, "loss": 17.9444, "step": 6926 }, { "epoch": 0.12661999378507321, "grad_norm": 7.465689725152292, "learning_rate": 9.757271584266894e-06, "loss": 17.8804, "step": 6927 }, { "epoch": 0.12663827298151972, "grad_norm": 6.505481223851254, "learning_rate": 9.757180466539314e-06, "loss": 17.4906, "step": 6928 }, { "epoch": 0.12665655217796626, "grad_norm": 7.675165837939194, "learning_rate": 9.757089332138227e-06, "loss": 18.3166, "step": 6929 }, { "epoch": 0.1266748313744128, "grad_norm": 8.04806473890603, "learning_rate": 9.756998181063956e-06, "loss": 17.9215, "step": 6930 }, { "epoch": 0.1266931105708593, "grad_norm": 7.944870275372156, "learning_rate": 9.75690701331682e-06, "loss": 18.3751, "step": 6931 }, { "epoch": 0.12671138976730584, "grad_norm": 8.007043568618746, "learning_rate": 9.756815828897139e-06, "loss": 18.252, "step": 6932 }, { "epoch": 0.12672966896375235, "grad_norm": 6.366963345032425, "learning_rate": 9.756724627805228e-06, "loss": 17.3992, "step": 6933 }, { "epoch": 0.12674794816019888, "grad_norm": 6.433359645818439, "learning_rate": 9.756633410041412e-06, "loss": 17.5832, "step": 6934 }, { "epoch": 0.1267662273566454, "grad_norm": 8.018598139254433, "learning_rate": 9.756542175606009e-06, "loss": 18.1824, "step": 6935 }, { "epoch": 0.12678450655309192, "grad_norm": 6.591706798911266, "learning_rate": 9.756450924499337e-06, "loss": 17.482, "step": 6936 }, { "epoch": 0.12680278574953846, "grad_norm": 6.670475773897604, "learning_rate": 9.756359656721718e-06, "loss": 17.4916, "step": 6937 }, { "epoch": 0.12682106494598497, "grad_norm": 7.389590100695097, "learning_rate": 9.756268372273471e-06, "loss": 18.1617, "step": 6938 }, { "epoch": 0.1268393441424315, "grad_norm": 5.68649928573887, "learning_rate": 9.756177071154917e-06, "loss": 17.4034, "step": 6939 }, { "epoch": 0.126857623338878, "grad_norm": 7.291106003259725, "learning_rate": 9.756085753366374e-06, "loss": 17.7263, "step": 6940 }, { "epoch": 0.12687590253532455, "grad_norm": 7.8527776067626345, "learning_rate": 9.755994418908163e-06, "loss": 18.3024, "step": 6941 }, { "epoch": 0.12689418173177108, "grad_norm": 8.270895321656669, "learning_rate": 9.755903067780604e-06, "loss": 18.3105, "step": 6942 }, { "epoch": 0.1269124609282176, "grad_norm": 6.777498831085499, "learning_rate": 9.755811699984019e-06, "loss": 17.7027, "step": 6943 }, { "epoch": 0.12693074012466413, "grad_norm": 8.05246083098168, "learning_rate": 9.755720315518724e-06, "loss": 18.021, "step": 6944 }, { "epoch": 0.12694901932111063, "grad_norm": 7.863604419253536, "learning_rate": 9.755628914385045e-06, "loss": 17.9604, "step": 6945 }, { "epoch": 0.12696729851755717, "grad_norm": 7.362790015904113, "learning_rate": 9.755537496583299e-06, "loss": 17.7251, "step": 6946 }, { "epoch": 0.1269855777140037, "grad_norm": 6.790814436100857, "learning_rate": 9.755446062113804e-06, "loss": 17.7339, "step": 6947 }, { "epoch": 0.1270038569104502, "grad_norm": 6.046904756191634, "learning_rate": 9.755354610976887e-06, "loss": 17.222, "step": 6948 }, { "epoch": 0.12702213610689675, "grad_norm": 7.016191801706925, "learning_rate": 9.755263143172861e-06, "loss": 17.7207, "step": 6949 }, { "epoch": 0.12704041530334326, "grad_norm": 6.905456591821923, "learning_rate": 9.755171658702053e-06, "loss": 17.7975, "step": 6950 }, { "epoch": 0.1270586944997898, "grad_norm": 6.787073661730627, "learning_rate": 9.75508015756478e-06, "loss": 17.6786, "step": 6951 }, { "epoch": 0.1270769736962363, "grad_norm": 7.194243921241274, "learning_rate": 9.754988639761364e-06, "loss": 17.9999, "step": 6952 }, { "epoch": 0.12709525289268284, "grad_norm": 7.201920287961771, "learning_rate": 9.754897105292125e-06, "loss": 18.3577, "step": 6953 }, { "epoch": 0.12711353208912937, "grad_norm": 6.270448311111634, "learning_rate": 9.754805554157384e-06, "loss": 17.4376, "step": 6954 }, { "epoch": 0.12713181128557588, "grad_norm": 7.552657929665543, "learning_rate": 9.754713986357462e-06, "loss": 17.8489, "step": 6955 }, { "epoch": 0.12715009048202242, "grad_norm": 7.505230455309283, "learning_rate": 9.754622401892681e-06, "loss": 18.0299, "step": 6956 }, { "epoch": 0.12716836967846892, "grad_norm": 8.194620048769831, "learning_rate": 9.75453080076336e-06, "loss": 18.0597, "step": 6957 }, { "epoch": 0.12718664887491546, "grad_norm": 6.852443864005118, "learning_rate": 9.754439182969822e-06, "loss": 17.8304, "step": 6958 }, { "epoch": 0.127204928071362, "grad_norm": 8.07085489741905, "learning_rate": 9.754347548512388e-06, "loss": 18.2323, "step": 6959 }, { "epoch": 0.1272232072678085, "grad_norm": 7.321412998953154, "learning_rate": 9.754255897391378e-06, "loss": 17.8115, "step": 6960 }, { "epoch": 0.12724148646425504, "grad_norm": 6.655116694758986, "learning_rate": 9.754164229607112e-06, "loss": 17.566, "step": 6961 }, { "epoch": 0.12725976566070155, "grad_norm": 7.635913731930229, "learning_rate": 9.754072545159914e-06, "loss": 17.9166, "step": 6962 }, { "epoch": 0.12727804485714808, "grad_norm": 7.4899502176585635, "learning_rate": 9.753980844050104e-06, "loss": 18.1651, "step": 6963 }, { "epoch": 0.12729632405359462, "grad_norm": 7.474395602806392, "learning_rate": 9.753889126278004e-06, "loss": 18.2668, "step": 6964 }, { "epoch": 0.12731460325004113, "grad_norm": 6.253527536548421, "learning_rate": 9.753797391843936e-06, "loss": 17.3812, "step": 6965 }, { "epoch": 0.12733288244648766, "grad_norm": 6.303688954789781, "learning_rate": 9.753705640748219e-06, "loss": 17.4008, "step": 6966 }, { "epoch": 0.12735116164293417, "grad_norm": 7.603106855419932, "learning_rate": 9.753613872991176e-06, "loss": 18.1482, "step": 6967 }, { "epoch": 0.1273694408393807, "grad_norm": 6.2429332485433395, "learning_rate": 9.75352208857313e-06, "loss": 17.5214, "step": 6968 }, { "epoch": 0.1273877200358272, "grad_norm": 8.375732323577289, "learning_rate": 9.753430287494403e-06, "loss": 17.8937, "step": 6969 }, { "epoch": 0.12740599923227375, "grad_norm": 7.625430775782968, "learning_rate": 9.753338469755314e-06, "loss": 18.2324, "step": 6970 }, { "epoch": 0.12742427842872028, "grad_norm": 8.041214519123109, "learning_rate": 9.753246635356186e-06, "loss": 18.3126, "step": 6971 }, { "epoch": 0.1274425576251668, "grad_norm": 6.434700696600225, "learning_rate": 9.753154784297341e-06, "loss": 17.7066, "step": 6972 }, { "epoch": 0.12746083682161333, "grad_norm": 7.253644918125441, "learning_rate": 9.7530629165791e-06, "loss": 17.7911, "step": 6973 }, { "epoch": 0.12747911601805983, "grad_norm": 7.015871110635574, "learning_rate": 9.752971032201787e-06, "loss": 17.5315, "step": 6974 }, { "epoch": 0.12749739521450637, "grad_norm": 6.720732521649265, "learning_rate": 9.752879131165722e-06, "loss": 17.8203, "step": 6975 }, { "epoch": 0.1275156744109529, "grad_norm": 7.3385673433295535, "learning_rate": 9.752787213471229e-06, "loss": 17.9138, "step": 6976 }, { "epoch": 0.12753395360739941, "grad_norm": 8.646071528793843, "learning_rate": 9.752695279118629e-06, "loss": 18.4491, "step": 6977 }, { "epoch": 0.12755223280384595, "grad_norm": 7.223292956533253, "learning_rate": 9.752603328108245e-06, "loss": 18.0877, "step": 6978 }, { "epoch": 0.12757051200029246, "grad_norm": 10.667741010269506, "learning_rate": 9.752511360440397e-06, "loss": 18.8063, "step": 6979 }, { "epoch": 0.127588791196739, "grad_norm": 6.884591377310287, "learning_rate": 9.752419376115412e-06, "loss": 17.8854, "step": 6980 }, { "epoch": 0.12760707039318553, "grad_norm": 6.616257206443414, "learning_rate": 9.752327375133608e-06, "loss": 17.8793, "step": 6981 }, { "epoch": 0.12762534958963204, "grad_norm": 8.354632150166884, "learning_rate": 9.752235357495307e-06, "loss": 18.566, "step": 6982 }, { "epoch": 0.12764362878607857, "grad_norm": 5.616048601322087, "learning_rate": 9.752143323200837e-06, "loss": 17.2041, "step": 6983 }, { "epoch": 0.12766190798252508, "grad_norm": 7.053696647062669, "learning_rate": 9.752051272250516e-06, "loss": 17.7545, "step": 6984 }, { "epoch": 0.12768018717897162, "grad_norm": 7.646423507021111, "learning_rate": 9.751959204644665e-06, "loss": 18.1997, "step": 6985 }, { "epoch": 0.12769846637541812, "grad_norm": 7.470353436516299, "learning_rate": 9.751867120383611e-06, "loss": 18.1901, "step": 6986 }, { "epoch": 0.12771674557186466, "grad_norm": 5.582291147049489, "learning_rate": 9.751775019467677e-06, "loss": 17.353, "step": 6987 }, { "epoch": 0.1277350247683112, "grad_norm": 7.072988608141418, "learning_rate": 9.751682901897181e-06, "loss": 17.7201, "step": 6988 }, { "epoch": 0.1277533039647577, "grad_norm": 7.051201083526724, "learning_rate": 9.751590767672451e-06, "loss": 17.6554, "step": 6989 }, { "epoch": 0.12777158316120424, "grad_norm": 8.399054978627413, "learning_rate": 9.751498616793806e-06, "loss": 18.7966, "step": 6990 }, { "epoch": 0.12778986235765075, "grad_norm": 7.191940606262273, "learning_rate": 9.751406449261572e-06, "loss": 18.2619, "step": 6991 }, { "epoch": 0.12780814155409728, "grad_norm": 7.391879516430415, "learning_rate": 9.75131426507607e-06, "loss": 17.8321, "step": 6992 }, { "epoch": 0.12782642075054382, "grad_norm": 6.499690438607266, "learning_rate": 9.751222064237624e-06, "loss": 17.6385, "step": 6993 }, { "epoch": 0.12784469994699033, "grad_norm": 7.034250916249036, "learning_rate": 9.751129846746557e-06, "loss": 17.7252, "step": 6994 }, { "epoch": 0.12786297914343686, "grad_norm": 6.4572172651760065, "learning_rate": 9.75103761260319e-06, "loss": 17.5323, "step": 6995 }, { "epoch": 0.12788125833988337, "grad_norm": 6.04905851054078, "learning_rate": 9.750945361807852e-06, "loss": 17.4104, "step": 6996 }, { "epoch": 0.1278995375363299, "grad_norm": 6.133914125359956, "learning_rate": 9.750853094360861e-06, "loss": 17.4268, "step": 6997 }, { "epoch": 0.12791781673277644, "grad_norm": 5.781869773694671, "learning_rate": 9.750760810262543e-06, "loss": 17.2417, "step": 6998 }, { "epoch": 0.12793609592922295, "grad_norm": 6.7826479384989, "learning_rate": 9.750668509513219e-06, "loss": 17.4406, "step": 6999 }, { "epoch": 0.12795437512566948, "grad_norm": 8.786267572688896, "learning_rate": 9.750576192113216e-06, "loss": 18.801, "step": 7000 }, { "epoch": 0.127972654322116, "grad_norm": 5.910149796637377, "learning_rate": 9.750483858062856e-06, "loss": 17.5747, "step": 7001 }, { "epoch": 0.12799093351856253, "grad_norm": 8.012497036080479, "learning_rate": 9.75039150736246e-06, "loss": 18.2773, "step": 7002 }, { "epoch": 0.12800921271500904, "grad_norm": 6.451909566644818, "learning_rate": 9.750299140012355e-06, "loss": 17.5821, "step": 7003 }, { "epoch": 0.12802749191145557, "grad_norm": 7.423518602264178, "learning_rate": 9.750206756012864e-06, "loss": 17.8494, "step": 7004 }, { "epoch": 0.1280457711079021, "grad_norm": 8.361455105057958, "learning_rate": 9.750114355364311e-06, "loss": 18.4579, "step": 7005 }, { "epoch": 0.12806405030434861, "grad_norm": 7.878516491188714, "learning_rate": 9.750021938067018e-06, "loss": 18.3428, "step": 7006 }, { "epoch": 0.12808232950079515, "grad_norm": 7.737349443881559, "learning_rate": 9.74992950412131e-06, "loss": 17.9246, "step": 7007 }, { "epoch": 0.12810060869724166, "grad_norm": 6.69412927981068, "learning_rate": 9.749837053527512e-06, "loss": 17.9627, "step": 7008 }, { "epoch": 0.1281188878936882, "grad_norm": 8.423308730240684, "learning_rate": 9.749744586285948e-06, "loss": 18.1873, "step": 7009 }, { "epoch": 0.12813716709013473, "grad_norm": 5.8435805157842475, "learning_rate": 9.749652102396942e-06, "loss": 17.3101, "step": 7010 }, { "epoch": 0.12815544628658124, "grad_norm": 6.086695525064017, "learning_rate": 9.749559601860816e-06, "loss": 17.3515, "step": 7011 }, { "epoch": 0.12817372548302777, "grad_norm": 8.287445424367927, "learning_rate": 9.749467084677896e-06, "loss": 18.5489, "step": 7012 }, { "epoch": 0.12819200467947428, "grad_norm": 6.899548081555345, "learning_rate": 9.749374550848506e-06, "loss": 17.7959, "step": 7013 }, { "epoch": 0.12821028387592082, "grad_norm": 6.732953731533117, "learning_rate": 9.74928200037297e-06, "loss": 17.4766, "step": 7014 }, { "epoch": 0.12822856307236735, "grad_norm": 6.580377041773626, "learning_rate": 9.749189433251614e-06, "loss": 17.704, "step": 7015 }, { "epoch": 0.12824684226881386, "grad_norm": 9.179270160637689, "learning_rate": 9.74909684948476e-06, "loss": 18.4665, "step": 7016 }, { "epoch": 0.1282651214652604, "grad_norm": 7.6005357688836455, "learning_rate": 9.749004249072735e-06, "loss": 17.8569, "step": 7017 }, { "epoch": 0.1282834006617069, "grad_norm": 6.72430777417885, "learning_rate": 9.74891163201586e-06, "loss": 17.7693, "step": 7018 }, { "epoch": 0.12830167985815344, "grad_norm": 7.187779294795083, "learning_rate": 9.748818998314465e-06, "loss": 17.799, "step": 7019 }, { "epoch": 0.12831995905459995, "grad_norm": 8.76545881250089, "learning_rate": 9.748726347968868e-06, "loss": 17.7423, "step": 7020 }, { "epoch": 0.12833823825104648, "grad_norm": 6.881822173934101, "learning_rate": 9.7486336809794e-06, "loss": 17.5504, "step": 7021 }, { "epoch": 0.12835651744749302, "grad_norm": 6.769410542320792, "learning_rate": 9.748540997346382e-06, "loss": 17.7062, "step": 7022 }, { "epoch": 0.12837479664393953, "grad_norm": 8.1003885379815, "learning_rate": 9.74844829707014e-06, "loss": 18.3501, "step": 7023 }, { "epoch": 0.12839307584038606, "grad_norm": 7.0562596310985874, "learning_rate": 9.748355580150999e-06, "loss": 18.0205, "step": 7024 }, { "epoch": 0.12841135503683257, "grad_norm": 6.034469589388568, "learning_rate": 9.748262846589282e-06, "loss": 17.2974, "step": 7025 }, { "epoch": 0.1284296342332791, "grad_norm": 7.393384845502981, "learning_rate": 9.748170096385316e-06, "loss": 17.7875, "step": 7026 }, { "epoch": 0.12844791342972564, "grad_norm": 7.7241030594842695, "learning_rate": 9.748077329539428e-06, "loss": 18.3198, "step": 7027 }, { "epoch": 0.12846619262617215, "grad_norm": 7.704276417514002, "learning_rate": 9.747984546051941e-06, "loss": 17.7201, "step": 7028 }, { "epoch": 0.12848447182261868, "grad_norm": 7.267082095675214, "learning_rate": 9.747891745923177e-06, "loss": 17.913, "step": 7029 }, { "epoch": 0.1285027510190652, "grad_norm": 6.5439131308136735, "learning_rate": 9.747798929153467e-06, "loss": 17.6853, "step": 7030 }, { "epoch": 0.12852103021551173, "grad_norm": 7.366102539188293, "learning_rate": 9.747706095743134e-06, "loss": 17.9143, "step": 7031 }, { "epoch": 0.12853930941195826, "grad_norm": 7.780048607840062, "learning_rate": 9.747613245692503e-06, "loss": 17.9475, "step": 7032 }, { "epoch": 0.12855758860840477, "grad_norm": 9.331706422813205, "learning_rate": 9.747520379001898e-06, "loss": 18.4692, "step": 7033 }, { "epoch": 0.1285758678048513, "grad_norm": 6.554863952948978, "learning_rate": 9.747427495671646e-06, "loss": 17.8817, "step": 7034 }, { "epoch": 0.12859414700129781, "grad_norm": 6.619621923388074, "learning_rate": 9.747334595702073e-06, "loss": 17.5481, "step": 7035 }, { "epoch": 0.12861242619774435, "grad_norm": 7.2867554019539496, "learning_rate": 9.747241679093506e-06, "loss": 17.7352, "step": 7036 }, { "epoch": 0.12863070539419086, "grad_norm": 6.168823558666478, "learning_rate": 9.747148745846266e-06, "loss": 17.3254, "step": 7037 }, { "epoch": 0.1286489845906374, "grad_norm": 7.343672397809512, "learning_rate": 9.747055795960685e-06, "loss": 17.8599, "step": 7038 }, { "epoch": 0.12866726378708393, "grad_norm": 7.788202047403616, "learning_rate": 9.746962829437084e-06, "loss": 17.5477, "step": 7039 }, { "epoch": 0.12868554298353044, "grad_norm": 7.753770133924901, "learning_rate": 9.746869846275788e-06, "loss": 18.0494, "step": 7040 }, { "epoch": 0.12870382217997697, "grad_norm": 7.254213768272908, "learning_rate": 9.746776846477127e-06, "loss": 18.1565, "step": 7041 }, { "epoch": 0.12872210137642348, "grad_norm": 5.482515321584168, "learning_rate": 9.746683830041425e-06, "loss": 16.9708, "step": 7042 }, { "epoch": 0.12874038057287002, "grad_norm": 8.073657743389086, "learning_rate": 9.746590796969009e-06, "loss": 18.122, "step": 7043 }, { "epoch": 0.12875865976931655, "grad_norm": 6.337742609991647, "learning_rate": 9.746497747260202e-06, "loss": 17.7471, "step": 7044 }, { "epoch": 0.12877693896576306, "grad_norm": 6.861792871142012, "learning_rate": 9.746404680915334e-06, "loss": 17.674, "step": 7045 }, { "epoch": 0.1287952181622096, "grad_norm": 6.073323009333164, "learning_rate": 9.746311597934729e-06, "loss": 17.5672, "step": 7046 }, { "epoch": 0.1288134973586561, "grad_norm": 7.737803858797643, "learning_rate": 9.746218498318713e-06, "loss": 17.9827, "step": 7047 }, { "epoch": 0.12883177655510264, "grad_norm": 6.595915249149195, "learning_rate": 9.746125382067614e-06, "loss": 17.655, "step": 7048 }, { "epoch": 0.12885005575154918, "grad_norm": 7.94492656241729, "learning_rate": 9.746032249181755e-06, "loss": 17.9237, "step": 7049 }, { "epoch": 0.12886833494799568, "grad_norm": 7.785146945906904, "learning_rate": 9.745939099661467e-06, "loss": 18.0476, "step": 7050 }, { "epoch": 0.12888661414444222, "grad_norm": 7.299852152564878, "learning_rate": 9.745845933507075e-06, "loss": 18.0855, "step": 7051 }, { "epoch": 0.12890489334088873, "grad_norm": 6.080794940359898, "learning_rate": 9.745752750718904e-06, "loss": 17.081, "step": 7052 }, { "epoch": 0.12892317253733526, "grad_norm": 9.400370378916696, "learning_rate": 9.745659551297282e-06, "loss": 18.9966, "step": 7053 }, { "epoch": 0.12894145173378177, "grad_norm": 6.607722833662794, "learning_rate": 9.745566335242534e-06, "loss": 17.7522, "step": 7054 }, { "epoch": 0.1289597309302283, "grad_norm": 5.881197931608681, "learning_rate": 9.745473102554988e-06, "loss": 17.0914, "step": 7055 }, { "epoch": 0.12897801012667484, "grad_norm": 8.8913460947489, "learning_rate": 9.74537985323497e-06, "loss": 18.1216, "step": 7056 }, { "epoch": 0.12899628932312135, "grad_norm": 7.533926893007955, "learning_rate": 9.74528658728281e-06, "loss": 18.1686, "step": 7057 }, { "epoch": 0.12901456851956788, "grad_norm": 7.346160191812552, "learning_rate": 9.74519330469883e-06, "loss": 17.9362, "step": 7058 }, { "epoch": 0.1290328477160144, "grad_norm": 6.833229114822008, "learning_rate": 9.745100005483359e-06, "loss": 17.6809, "step": 7059 }, { "epoch": 0.12905112691246093, "grad_norm": 9.286908375591532, "learning_rate": 9.745006689636725e-06, "loss": 18.9791, "step": 7060 }, { "epoch": 0.12906940610890746, "grad_norm": 7.635266165167685, "learning_rate": 9.744913357159253e-06, "loss": 17.998, "step": 7061 }, { "epoch": 0.12908768530535397, "grad_norm": 7.121177886747499, "learning_rate": 9.744820008051275e-06, "loss": 17.8321, "step": 7062 }, { "epoch": 0.1291059645018005, "grad_norm": 5.4608475466962965, "learning_rate": 9.744726642313112e-06, "loss": 17.3149, "step": 7063 }, { "epoch": 0.12912424369824702, "grad_norm": 7.152818697394675, "learning_rate": 9.744633259945093e-06, "loss": 17.515, "step": 7064 }, { "epoch": 0.12914252289469355, "grad_norm": 6.100990233000748, "learning_rate": 9.744539860947548e-06, "loss": 17.2829, "step": 7065 }, { "epoch": 0.1291608020911401, "grad_norm": 6.021616617687753, "learning_rate": 9.744446445320801e-06, "loss": 17.3312, "step": 7066 }, { "epoch": 0.1291790812875866, "grad_norm": 6.535681209185815, "learning_rate": 9.744353013065183e-06, "loss": 17.4638, "step": 7067 }, { "epoch": 0.12919736048403313, "grad_norm": 7.784917741765711, "learning_rate": 9.744259564181016e-06, "loss": 18.0221, "step": 7068 }, { "epoch": 0.12921563968047964, "grad_norm": 7.617343393835883, "learning_rate": 9.744166098668635e-06, "loss": 17.9527, "step": 7069 }, { "epoch": 0.12923391887692617, "grad_norm": 7.90601165287111, "learning_rate": 9.74407261652836e-06, "loss": 18.1686, "step": 7070 }, { "epoch": 0.12925219807337268, "grad_norm": 7.008037531889261, "learning_rate": 9.743979117760525e-06, "loss": 17.8141, "step": 7071 }, { "epoch": 0.12927047726981922, "grad_norm": 8.37047423130954, "learning_rate": 9.743885602365453e-06, "loss": 18.0465, "step": 7072 }, { "epoch": 0.12928875646626575, "grad_norm": 6.9934475487925525, "learning_rate": 9.743792070343474e-06, "loss": 17.8282, "step": 7073 }, { "epoch": 0.12930703566271226, "grad_norm": 7.237583451545216, "learning_rate": 9.743698521694915e-06, "loss": 17.7588, "step": 7074 }, { "epoch": 0.1293253148591588, "grad_norm": 7.070649931872262, "learning_rate": 9.743604956420105e-06, "loss": 17.8541, "step": 7075 }, { "epoch": 0.1293435940556053, "grad_norm": 7.078642539692684, "learning_rate": 9.743511374519371e-06, "loss": 17.8196, "step": 7076 }, { "epoch": 0.12936187325205184, "grad_norm": 7.02475733593393, "learning_rate": 9.743417775993041e-06, "loss": 17.5844, "step": 7077 }, { "epoch": 0.12938015244849838, "grad_norm": 9.37372271448173, "learning_rate": 9.743324160841444e-06, "loss": 18.5396, "step": 7078 }, { "epoch": 0.12939843164494488, "grad_norm": 8.236034768064512, "learning_rate": 9.743230529064906e-06, "loss": 18.2293, "step": 7079 }, { "epoch": 0.12941671084139142, "grad_norm": 8.536656548092834, "learning_rate": 9.743136880663759e-06, "loss": 17.7928, "step": 7080 }, { "epoch": 0.12943499003783793, "grad_norm": 6.649512660286544, "learning_rate": 9.743043215638328e-06, "loss": 17.7391, "step": 7081 }, { "epoch": 0.12945326923428446, "grad_norm": 6.926789336046934, "learning_rate": 9.742949533988942e-06, "loss": 17.7954, "step": 7082 }, { "epoch": 0.129471548430731, "grad_norm": 7.691534260735286, "learning_rate": 9.742855835715928e-06, "loss": 18.1081, "step": 7083 }, { "epoch": 0.1294898276271775, "grad_norm": 7.150770709615593, "learning_rate": 9.742762120819618e-06, "loss": 17.947, "step": 7084 }, { "epoch": 0.12950810682362404, "grad_norm": 6.809459253562558, "learning_rate": 9.742668389300335e-06, "loss": 17.5234, "step": 7085 }, { "epoch": 0.12952638602007055, "grad_norm": 6.585411731518869, "learning_rate": 9.742574641158414e-06, "loss": 17.5973, "step": 7086 }, { "epoch": 0.12954466521651709, "grad_norm": 7.445803412206817, "learning_rate": 9.74248087639418e-06, "loss": 17.6825, "step": 7087 }, { "epoch": 0.1295629444129636, "grad_norm": 7.96816585816933, "learning_rate": 9.742387095007962e-06, "loss": 18.1658, "step": 7088 }, { "epoch": 0.12958122360941013, "grad_norm": 8.622071128910187, "learning_rate": 9.742293297000088e-06, "loss": 18.2712, "step": 7089 }, { "epoch": 0.12959950280585666, "grad_norm": 6.31027197922364, "learning_rate": 9.74219948237089e-06, "loss": 17.6311, "step": 7090 }, { "epoch": 0.12961778200230317, "grad_norm": 6.698700480938034, "learning_rate": 9.742105651120691e-06, "loss": 17.1604, "step": 7091 }, { "epoch": 0.1296360611987497, "grad_norm": 8.636028796784073, "learning_rate": 9.742011803249824e-06, "loss": 18.434, "step": 7092 }, { "epoch": 0.12965434039519622, "grad_norm": 7.473634804013628, "learning_rate": 9.741917938758617e-06, "loss": 17.9458, "step": 7093 }, { "epoch": 0.12967261959164275, "grad_norm": 8.03960125708875, "learning_rate": 9.7418240576474e-06, "loss": 18.0053, "step": 7094 }, { "epoch": 0.1296908987880893, "grad_norm": 6.118158265189932, "learning_rate": 9.7417301599165e-06, "loss": 17.3436, "step": 7095 }, { "epoch": 0.1297091779845358, "grad_norm": 7.823548294120425, "learning_rate": 9.741636245566248e-06, "loss": 18.0909, "step": 7096 }, { "epoch": 0.12972745718098233, "grad_norm": 8.919167180196236, "learning_rate": 9.741542314596973e-06, "loss": 18.4706, "step": 7097 }, { "epoch": 0.12974573637742884, "grad_norm": 6.797446443118667, "learning_rate": 9.741448367009003e-06, "loss": 17.5313, "step": 7098 }, { "epoch": 0.12976401557387537, "grad_norm": 6.891792027736653, "learning_rate": 9.741354402802668e-06, "loss": 17.7328, "step": 7099 }, { "epoch": 0.1297822947703219, "grad_norm": 6.24232287085528, "learning_rate": 9.741260421978297e-06, "loss": 17.4414, "step": 7100 }, { "epoch": 0.12980057396676842, "grad_norm": 8.769541465073212, "learning_rate": 9.74116642453622e-06, "loss": 18.6885, "step": 7101 }, { "epoch": 0.12981885316321495, "grad_norm": 7.405110796041737, "learning_rate": 9.741072410476766e-06, "loss": 17.6396, "step": 7102 }, { "epoch": 0.12983713235966146, "grad_norm": 6.332131906245619, "learning_rate": 9.740978379800265e-06, "loss": 17.5006, "step": 7103 }, { "epoch": 0.129855411556108, "grad_norm": 8.541532411360873, "learning_rate": 9.740884332507045e-06, "loss": 18.0133, "step": 7104 }, { "epoch": 0.1298736907525545, "grad_norm": 6.965117238326117, "learning_rate": 9.740790268597438e-06, "loss": 17.6445, "step": 7105 }, { "epoch": 0.12989196994900104, "grad_norm": 7.739700403058189, "learning_rate": 9.740696188071772e-06, "loss": 17.6502, "step": 7106 }, { "epoch": 0.12991024914544758, "grad_norm": 10.651807663877248, "learning_rate": 9.740602090930378e-06, "loss": 18.537, "step": 7107 }, { "epoch": 0.12992852834189408, "grad_norm": 7.011919563083695, "learning_rate": 9.740507977173585e-06, "loss": 17.766, "step": 7108 }, { "epoch": 0.12994680753834062, "grad_norm": 6.805176369720356, "learning_rate": 9.740413846801722e-06, "loss": 17.5963, "step": 7109 }, { "epoch": 0.12996508673478713, "grad_norm": 6.300468268748072, "learning_rate": 9.74031969981512e-06, "loss": 17.3778, "step": 7110 }, { "epoch": 0.12998336593123366, "grad_norm": 6.625256291228368, "learning_rate": 9.740225536214108e-06, "loss": 17.4592, "step": 7111 }, { "epoch": 0.1300016451276802, "grad_norm": 6.53112538703904, "learning_rate": 9.740131355999018e-06, "loss": 17.4195, "step": 7112 }, { "epoch": 0.1300199243241267, "grad_norm": 7.174879385056336, "learning_rate": 9.740037159170179e-06, "loss": 17.6717, "step": 7113 }, { "epoch": 0.13003820352057324, "grad_norm": 7.357221230274843, "learning_rate": 9.73994294572792e-06, "loss": 17.7064, "step": 7114 }, { "epoch": 0.13005648271701975, "grad_norm": 7.222772844113478, "learning_rate": 9.739848715672573e-06, "loss": 17.7126, "step": 7115 }, { "epoch": 0.13007476191346629, "grad_norm": 8.832836088421047, "learning_rate": 9.739754469004467e-06, "loss": 18.5505, "step": 7116 }, { "epoch": 0.13009304110991282, "grad_norm": 7.159356110822697, "learning_rate": 9.739660205723935e-06, "loss": 18.1078, "step": 7117 }, { "epoch": 0.13011132030635933, "grad_norm": 7.163747630000917, "learning_rate": 9.739565925831304e-06, "loss": 17.7092, "step": 7118 }, { "epoch": 0.13012959950280586, "grad_norm": 8.932830491978832, "learning_rate": 9.739471629326904e-06, "loss": 18.3991, "step": 7119 }, { "epoch": 0.13014787869925237, "grad_norm": 7.314547240989594, "learning_rate": 9.73937731621107e-06, "loss": 17.7927, "step": 7120 }, { "epoch": 0.1301661578956989, "grad_norm": 7.43226134822261, "learning_rate": 9.73928298648413e-06, "loss": 18.1889, "step": 7121 }, { "epoch": 0.13018443709214542, "grad_norm": 6.246887550119452, "learning_rate": 9.73918864014641e-06, "loss": 17.4334, "step": 7122 }, { "epoch": 0.13020271628859195, "grad_norm": 6.4398750407548295, "learning_rate": 9.739094277198249e-06, "loss": 17.699, "step": 7123 }, { "epoch": 0.1302209954850385, "grad_norm": 6.860107349364731, "learning_rate": 9.738999897639973e-06, "loss": 17.8638, "step": 7124 }, { "epoch": 0.130239274681485, "grad_norm": 9.113527874042134, "learning_rate": 9.738905501471914e-06, "loss": 18.2573, "step": 7125 }, { "epoch": 0.13025755387793153, "grad_norm": 7.166982317668824, "learning_rate": 9.738811088694401e-06, "loss": 17.4839, "step": 7126 }, { "epoch": 0.13027583307437804, "grad_norm": 5.822801345459667, "learning_rate": 9.738716659307767e-06, "loss": 17.2673, "step": 7127 }, { "epoch": 0.13029411227082457, "grad_norm": 7.079865700002657, "learning_rate": 9.738622213312343e-06, "loss": 18.0549, "step": 7128 }, { "epoch": 0.1303123914672711, "grad_norm": 6.880181479310935, "learning_rate": 9.738527750708458e-06, "loss": 17.8285, "step": 7129 }, { "epoch": 0.13033067066371762, "grad_norm": 6.270793460597133, "learning_rate": 9.738433271496445e-06, "loss": 17.3228, "step": 7130 }, { "epoch": 0.13034894986016415, "grad_norm": 6.520184219414416, "learning_rate": 9.738338775676634e-06, "loss": 17.5676, "step": 7131 }, { "epoch": 0.13036722905661066, "grad_norm": 6.757513125586967, "learning_rate": 9.73824426324936e-06, "loss": 17.7184, "step": 7132 }, { "epoch": 0.1303855082530572, "grad_norm": 8.613481182446472, "learning_rate": 9.738149734214947e-06, "loss": 18.1121, "step": 7133 }, { "epoch": 0.13040378744950373, "grad_norm": 7.9340290228511705, "learning_rate": 9.738055188573731e-06, "loss": 18.2459, "step": 7134 }, { "epoch": 0.13042206664595024, "grad_norm": 7.213911441171221, "learning_rate": 9.737960626326044e-06, "loss": 17.7702, "step": 7135 }, { "epoch": 0.13044034584239678, "grad_norm": 7.1004579036797955, "learning_rate": 9.737866047472215e-06, "loss": 17.7682, "step": 7136 }, { "epoch": 0.13045862503884328, "grad_norm": 6.839243807506448, "learning_rate": 9.737771452012579e-06, "loss": 17.8289, "step": 7137 }, { "epoch": 0.13047690423528982, "grad_norm": 8.331470974201615, "learning_rate": 9.737676839947463e-06, "loss": 18.0984, "step": 7138 }, { "epoch": 0.13049518343173633, "grad_norm": 6.714694618846473, "learning_rate": 9.7375822112772e-06, "loss": 17.7931, "step": 7139 }, { "epoch": 0.13051346262818286, "grad_norm": 7.5737923207037205, "learning_rate": 9.737487566002126e-06, "loss": 17.827, "step": 7140 }, { "epoch": 0.1305317418246294, "grad_norm": 7.105649428705973, "learning_rate": 9.737392904122565e-06, "loss": 17.9188, "step": 7141 }, { "epoch": 0.1305500210210759, "grad_norm": 7.47606356144569, "learning_rate": 9.737298225638856e-06, "loss": 17.7877, "step": 7142 }, { "epoch": 0.13056830021752244, "grad_norm": 7.131386039255281, "learning_rate": 9.737203530551327e-06, "loss": 17.7167, "step": 7143 }, { "epoch": 0.13058657941396895, "grad_norm": 7.9805889492847255, "learning_rate": 9.73710881886031e-06, "loss": 18.4009, "step": 7144 }, { "epoch": 0.1306048586104155, "grad_norm": 7.448580722022492, "learning_rate": 9.737014090566138e-06, "loss": 17.9732, "step": 7145 }, { "epoch": 0.13062313780686202, "grad_norm": 7.4679063591013275, "learning_rate": 9.736919345669142e-06, "loss": 17.9908, "step": 7146 }, { "epoch": 0.13064141700330853, "grad_norm": 7.384603113967751, "learning_rate": 9.736824584169656e-06, "loss": 17.4913, "step": 7147 }, { "epoch": 0.13065969619975507, "grad_norm": 7.806417424820133, "learning_rate": 9.73672980606801e-06, "loss": 18.0279, "step": 7148 }, { "epoch": 0.13067797539620157, "grad_norm": 6.997974581913617, "learning_rate": 9.736635011364538e-06, "loss": 17.92, "step": 7149 }, { "epoch": 0.1306962545926481, "grad_norm": 7.478948956993409, "learning_rate": 9.736540200059572e-06, "loss": 17.796, "step": 7150 }, { "epoch": 0.13071453378909464, "grad_norm": 6.024958741105644, "learning_rate": 9.736445372153441e-06, "loss": 17.4914, "step": 7151 }, { "epoch": 0.13073281298554115, "grad_norm": 7.520078447002026, "learning_rate": 9.736350527646481e-06, "loss": 18.1718, "step": 7152 }, { "epoch": 0.1307510921819877, "grad_norm": 7.159633598905982, "learning_rate": 9.736255666539026e-06, "loss": 17.9506, "step": 7153 }, { "epoch": 0.1307693713784342, "grad_norm": 7.515554897738069, "learning_rate": 9.736160788831401e-06, "loss": 18.3908, "step": 7154 }, { "epoch": 0.13078765057488073, "grad_norm": 8.618024991764836, "learning_rate": 9.736065894523947e-06, "loss": 18.1717, "step": 7155 }, { "epoch": 0.13080592977132724, "grad_norm": 6.254382251240645, "learning_rate": 9.735970983616992e-06, "loss": 17.5342, "step": 7156 }, { "epoch": 0.13082420896777378, "grad_norm": 7.67952894743319, "learning_rate": 9.73587605611087e-06, "loss": 18.0999, "step": 7157 }, { "epoch": 0.1308424881642203, "grad_norm": 7.759082523709545, "learning_rate": 9.735781112005913e-06, "loss": 17.9696, "step": 7158 }, { "epoch": 0.13086076736066682, "grad_norm": 6.85593738247165, "learning_rate": 9.735686151302455e-06, "loss": 17.8862, "step": 7159 }, { "epoch": 0.13087904655711335, "grad_norm": 6.5318907715428685, "learning_rate": 9.735591174000828e-06, "loss": 17.4204, "step": 7160 }, { "epoch": 0.13089732575355986, "grad_norm": 7.0040496867619115, "learning_rate": 9.735496180101362e-06, "loss": 17.637, "step": 7161 }, { "epoch": 0.1309156049500064, "grad_norm": 5.7779722178513016, "learning_rate": 9.735401169604396e-06, "loss": 17.3363, "step": 7162 }, { "epoch": 0.13093388414645293, "grad_norm": 8.303287070527276, "learning_rate": 9.735306142510259e-06, "loss": 18.3085, "step": 7163 }, { "epoch": 0.13095216334289944, "grad_norm": 7.110065308053239, "learning_rate": 9.735211098819283e-06, "loss": 17.6274, "step": 7164 }, { "epoch": 0.13097044253934598, "grad_norm": 6.262788790202189, "learning_rate": 9.735116038531806e-06, "loss": 17.3397, "step": 7165 }, { "epoch": 0.13098872173579248, "grad_norm": 8.564939431946543, "learning_rate": 9.735020961648156e-06, "loss": 17.9833, "step": 7166 }, { "epoch": 0.13100700093223902, "grad_norm": 6.293016637755449, "learning_rate": 9.73492586816867e-06, "loss": 17.4136, "step": 7167 }, { "epoch": 0.13102528012868556, "grad_norm": 6.822811223665426, "learning_rate": 9.734830758093679e-06, "loss": 17.5242, "step": 7168 }, { "epoch": 0.13104355932513206, "grad_norm": 7.397815281411342, "learning_rate": 9.734735631423517e-06, "loss": 17.8625, "step": 7169 }, { "epoch": 0.1310618385215786, "grad_norm": 6.853785375824345, "learning_rate": 9.734640488158517e-06, "loss": 17.7668, "step": 7170 }, { "epoch": 0.1310801177180251, "grad_norm": 8.2618386970712, "learning_rate": 9.734545328299014e-06, "loss": 18.3031, "step": 7171 }, { "epoch": 0.13109839691447164, "grad_norm": 6.532337976922704, "learning_rate": 9.734450151845341e-06, "loss": 17.4171, "step": 7172 }, { "epoch": 0.13111667611091815, "grad_norm": 7.061745401124181, "learning_rate": 9.734354958797829e-06, "loss": 17.7711, "step": 7173 }, { "epoch": 0.1311349553073647, "grad_norm": 8.452499012569488, "learning_rate": 9.734259749156815e-06, "loss": 18.0384, "step": 7174 }, { "epoch": 0.13115323450381122, "grad_norm": 6.123261611912143, "learning_rate": 9.734164522922631e-06, "loss": 17.498, "step": 7175 }, { "epoch": 0.13117151370025773, "grad_norm": 7.517392231596765, "learning_rate": 9.73406928009561e-06, "loss": 17.7518, "step": 7176 }, { "epoch": 0.13118979289670427, "grad_norm": 7.742621226274448, "learning_rate": 9.733974020676089e-06, "loss": 18.1766, "step": 7177 }, { "epoch": 0.13120807209315077, "grad_norm": 7.387095435548084, "learning_rate": 9.7338787446644e-06, "loss": 17.6182, "step": 7178 }, { "epoch": 0.1312263512895973, "grad_norm": 7.446250508000315, "learning_rate": 9.733783452060874e-06, "loss": 18.1282, "step": 7179 }, { "epoch": 0.13124463048604385, "grad_norm": 7.5390636994354505, "learning_rate": 9.73368814286585e-06, "loss": 18.2006, "step": 7180 }, { "epoch": 0.13126290968249035, "grad_norm": 7.599898660645298, "learning_rate": 9.733592817079661e-06, "loss": 17.602, "step": 7181 }, { "epoch": 0.1312811888789369, "grad_norm": 6.262963191082417, "learning_rate": 9.733497474702638e-06, "loss": 17.0976, "step": 7182 }, { "epoch": 0.1312994680753834, "grad_norm": 6.14727093181255, "learning_rate": 9.733402115735117e-06, "loss": 17.4255, "step": 7183 }, { "epoch": 0.13131774727182993, "grad_norm": 5.7133178099579665, "learning_rate": 9.733306740177432e-06, "loss": 16.9744, "step": 7184 }, { "epoch": 0.13133602646827647, "grad_norm": 8.507051019811717, "learning_rate": 9.73321134802992e-06, "loss": 18.5356, "step": 7185 }, { "epoch": 0.13135430566472298, "grad_norm": 9.555240157396033, "learning_rate": 9.73311593929291e-06, "loss": 18.7287, "step": 7186 }, { "epoch": 0.1313725848611695, "grad_norm": 7.73195458476903, "learning_rate": 9.73302051396674e-06, "loss": 17.776, "step": 7187 }, { "epoch": 0.13139086405761602, "grad_norm": 6.3066851006271785, "learning_rate": 9.732925072051746e-06, "loss": 17.4364, "step": 7188 }, { "epoch": 0.13140914325406255, "grad_norm": 6.530512281586624, "learning_rate": 9.732829613548258e-06, "loss": 17.4735, "step": 7189 }, { "epoch": 0.13142742245050906, "grad_norm": 7.670285926957407, "learning_rate": 9.732734138456614e-06, "loss": 18.0228, "step": 7190 }, { "epoch": 0.1314457016469556, "grad_norm": 6.881575360490226, "learning_rate": 9.732638646777148e-06, "loss": 17.6759, "step": 7191 }, { "epoch": 0.13146398084340213, "grad_norm": 9.05563547716694, "learning_rate": 9.732543138510193e-06, "loss": 18.2275, "step": 7192 }, { "epoch": 0.13148226003984864, "grad_norm": 7.306381515657076, "learning_rate": 9.732447613656087e-06, "loss": 17.9175, "step": 7193 }, { "epoch": 0.13150053923629518, "grad_norm": 9.280282371695481, "learning_rate": 9.732352072215162e-06, "loss": 17.8701, "step": 7194 }, { "epoch": 0.13151881843274169, "grad_norm": 7.755610675155056, "learning_rate": 9.73225651418775e-06, "loss": 17.9723, "step": 7195 }, { "epoch": 0.13153709762918822, "grad_norm": 5.94216702467463, "learning_rate": 9.732160939574194e-06, "loss": 17.276, "step": 7196 }, { "epoch": 0.13155537682563476, "grad_norm": 5.8325769245965935, "learning_rate": 9.732065348374821e-06, "loss": 17.3216, "step": 7197 }, { "epoch": 0.13157365602208126, "grad_norm": 7.8918461462941405, "learning_rate": 9.731969740589972e-06, "loss": 17.9062, "step": 7198 }, { "epoch": 0.1315919352185278, "grad_norm": 5.9828504034800085, "learning_rate": 9.731874116219981e-06, "loss": 17.3003, "step": 7199 }, { "epoch": 0.1316102144149743, "grad_norm": 6.271695218333179, "learning_rate": 9.73177847526518e-06, "loss": 17.408, "step": 7200 }, { "epoch": 0.13162849361142084, "grad_norm": 6.884913084335879, "learning_rate": 9.731682817725907e-06, "loss": 17.5766, "step": 7201 }, { "epoch": 0.13164677280786738, "grad_norm": 6.887401816460929, "learning_rate": 9.731587143602494e-06, "loss": 17.8024, "step": 7202 }, { "epoch": 0.1316650520043139, "grad_norm": 6.825550600345836, "learning_rate": 9.731491452895281e-06, "loss": 17.7197, "step": 7203 }, { "epoch": 0.13168333120076042, "grad_norm": 8.208850888222933, "learning_rate": 9.7313957456046e-06, "loss": 18.3821, "step": 7204 }, { "epoch": 0.13170161039720693, "grad_norm": 8.074895936854027, "learning_rate": 9.731300021730787e-06, "loss": 18.7384, "step": 7205 }, { "epoch": 0.13171988959365347, "grad_norm": 7.821903652140027, "learning_rate": 9.731204281274178e-06, "loss": 18.4072, "step": 7206 }, { "epoch": 0.13173816879009997, "grad_norm": 7.682609503179762, "learning_rate": 9.73110852423511e-06, "loss": 17.8128, "step": 7207 }, { "epoch": 0.1317564479865465, "grad_norm": 6.782940292836674, "learning_rate": 9.731012750613918e-06, "loss": 17.7275, "step": 7208 }, { "epoch": 0.13177472718299305, "grad_norm": 6.679171848756854, "learning_rate": 9.730916960410934e-06, "loss": 17.4771, "step": 7209 }, { "epoch": 0.13179300637943955, "grad_norm": 10.636301648491877, "learning_rate": 9.730821153626497e-06, "loss": 17.6422, "step": 7210 }, { "epoch": 0.1318112855758861, "grad_norm": 7.2774302201233985, "learning_rate": 9.730725330260945e-06, "loss": 17.6334, "step": 7211 }, { "epoch": 0.1318295647723326, "grad_norm": 9.41703242932954, "learning_rate": 9.730629490314609e-06, "loss": 18.422, "step": 7212 }, { "epoch": 0.13184784396877913, "grad_norm": 6.869794181421306, "learning_rate": 9.730533633787827e-06, "loss": 17.5357, "step": 7213 }, { "epoch": 0.13186612316522567, "grad_norm": 8.155327972728386, "learning_rate": 9.730437760680936e-06, "loss": 18.3217, "step": 7214 }, { "epoch": 0.13188440236167218, "grad_norm": 7.624147658658768, "learning_rate": 9.73034187099427e-06, "loss": 17.7722, "step": 7215 }, { "epoch": 0.1319026815581187, "grad_norm": 7.420377534640088, "learning_rate": 9.730245964728167e-06, "loss": 17.9153, "step": 7216 }, { "epoch": 0.13192096075456522, "grad_norm": 8.872178765800202, "learning_rate": 9.730150041882962e-06, "loss": 18.5382, "step": 7217 }, { "epoch": 0.13193923995101176, "grad_norm": 7.320922652635634, "learning_rate": 9.730054102458992e-06, "loss": 18.0761, "step": 7218 }, { "epoch": 0.1319575191474583, "grad_norm": 7.280140268895857, "learning_rate": 9.729958146456593e-06, "loss": 17.4843, "step": 7219 }, { "epoch": 0.1319757983439048, "grad_norm": 7.547182011375449, "learning_rate": 9.729862173876102e-06, "loss": 17.7176, "step": 7220 }, { "epoch": 0.13199407754035133, "grad_norm": 7.300794574100374, "learning_rate": 9.729766184717853e-06, "loss": 17.5465, "step": 7221 }, { "epoch": 0.13201235673679784, "grad_norm": 7.611837634531434, "learning_rate": 9.729670178982184e-06, "loss": 17.9243, "step": 7222 }, { "epoch": 0.13203063593324438, "grad_norm": 7.081527995399049, "learning_rate": 9.729574156669433e-06, "loss": 17.7627, "step": 7223 }, { "epoch": 0.13204891512969089, "grad_norm": 8.915259132218761, "learning_rate": 9.729478117779933e-06, "loss": 18.1401, "step": 7224 }, { "epoch": 0.13206719432613742, "grad_norm": 8.292487473290121, "learning_rate": 9.729382062314023e-06, "loss": 18.0996, "step": 7225 }, { "epoch": 0.13208547352258396, "grad_norm": 6.296746454594278, "learning_rate": 9.72928599027204e-06, "loss": 17.5431, "step": 7226 }, { "epoch": 0.13210375271903047, "grad_norm": 7.2671164091935285, "learning_rate": 9.72918990165432e-06, "loss": 17.8938, "step": 7227 }, { "epoch": 0.132122031915477, "grad_norm": 6.9395733615132835, "learning_rate": 9.7290937964612e-06, "loss": 17.7086, "step": 7228 }, { "epoch": 0.1321403111119235, "grad_norm": 7.3315954949805295, "learning_rate": 9.728997674693015e-06, "loss": 17.3778, "step": 7229 }, { "epoch": 0.13215859030837004, "grad_norm": 7.9153149440768855, "learning_rate": 9.728901536350106e-06, "loss": 18.2457, "step": 7230 }, { "epoch": 0.13217686950481658, "grad_norm": 6.8914822685486925, "learning_rate": 9.728805381432805e-06, "loss": 17.5621, "step": 7231 }, { "epoch": 0.1321951487012631, "grad_norm": 8.83534302114429, "learning_rate": 9.728709209941453e-06, "loss": 18.7026, "step": 7232 }, { "epoch": 0.13221342789770962, "grad_norm": 6.966925026227877, "learning_rate": 9.728613021876385e-06, "loss": 17.4717, "step": 7233 }, { "epoch": 0.13223170709415613, "grad_norm": 6.289173813053085, "learning_rate": 9.728516817237939e-06, "loss": 17.4835, "step": 7234 }, { "epoch": 0.13224998629060267, "grad_norm": 6.135824237560181, "learning_rate": 9.72842059602645e-06, "loss": 17.3179, "step": 7235 }, { "epoch": 0.1322682654870492, "grad_norm": 6.6226126286860865, "learning_rate": 9.72832435824226e-06, "loss": 17.4737, "step": 7236 }, { "epoch": 0.1322865446834957, "grad_norm": 6.62079242021924, "learning_rate": 9.728228103885702e-06, "loss": 17.5764, "step": 7237 }, { "epoch": 0.13230482387994225, "grad_norm": 10.527562765818056, "learning_rate": 9.728131832957115e-06, "loss": 17.4243, "step": 7238 }, { "epoch": 0.13232310307638875, "grad_norm": 7.049840722526844, "learning_rate": 9.728035545456837e-06, "loss": 17.6683, "step": 7239 }, { "epoch": 0.1323413822728353, "grad_norm": 5.980215077656075, "learning_rate": 9.727939241385203e-06, "loss": 17.0616, "step": 7240 }, { "epoch": 0.1323596614692818, "grad_norm": 7.133091112035594, "learning_rate": 9.727842920742554e-06, "loss": 17.6764, "step": 7241 }, { "epoch": 0.13237794066572833, "grad_norm": 7.454956226368991, "learning_rate": 9.727746583529225e-06, "loss": 18.0583, "step": 7242 }, { "epoch": 0.13239621986217487, "grad_norm": 8.062267426384313, "learning_rate": 9.727650229745554e-06, "loss": 18.0237, "step": 7243 }, { "epoch": 0.13241449905862138, "grad_norm": 7.5373814766321, "learning_rate": 9.727553859391881e-06, "loss": 17.7991, "step": 7244 }, { "epoch": 0.1324327782550679, "grad_norm": 7.597588160329692, "learning_rate": 9.72745747246854e-06, "loss": 18.081, "step": 7245 }, { "epoch": 0.13245105745151442, "grad_norm": 6.602527071310561, "learning_rate": 9.727361068975871e-06, "loss": 17.7052, "step": 7246 }, { "epoch": 0.13246933664796096, "grad_norm": 6.3018292516121575, "learning_rate": 9.727264648914212e-06, "loss": 17.4789, "step": 7247 }, { "epoch": 0.1324876158444075, "grad_norm": 6.348523634478017, "learning_rate": 9.727168212283902e-06, "loss": 17.2301, "step": 7248 }, { "epoch": 0.132505895040854, "grad_norm": 7.582114574713264, "learning_rate": 9.727071759085275e-06, "loss": 18.2832, "step": 7249 }, { "epoch": 0.13252417423730053, "grad_norm": 6.408570717052727, "learning_rate": 9.726975289318674e-06, "loss": 17.2156, "step": 7250 }, { "epoch": 0.13254245343374704, "grad_norm": 6.670369978913939, "learning_rate": 9.726878802984434e-06, "loss": 17.6147, "step": 7251 }, { "epoch": 0.13256073263019358, "grad_norm": 8.150834883500712, "learning_rate": 9.726782300082893e-06, "loss": 17.8667, "step": 7252 }, { "epoch": 0.13257901182664011, "grad_norm": 8.54348521469152, "learning_rate": 9.72668578061439e-06, "loss": 19.1827, "step": 7253 }, { "epoch": 0.13259729102308662, "grad_norm": 7.161062456562297, "learning_rate": 9.726589244579265e-06, "loss": 17.9603, "step": 7254 }, { "epoch": 0.13261557021953316, "grad_norm": 6.987094257255618, "learning_rate": 9.726492691977856e-06, "loss": 17.6345, "step": 7255 }, { "epoch": 0.13263384941597967, "grad_norm": 7.33513881872116, "learning_rate": 9.726396122810497e-06, "loss": 17.6273, "step": 7256 }, { "epoch": 0.1326521286124262, "grad_norm": 5.771420058036647, "learning_rate": 9.726299537077533e-06, "loss": 17.1635, "step": 7257 }, { "epoch": 0.1326704078088727, "grad_norm": 5.634709424961658, "learning_rate": 9.726202934779297e-06, "loss": 16.9322, "step": 7258 }, { "epoch": 0.13268868700531924, "grad_norm": 6.113352690103021, "learning_rate": 9.726106315916131e-06, "loss": 17.2111, "step": 7259 }, { "epoch": 0.13270696620176578, "grad_norm": 7.089025293622658, "learning_rate": 9.726009680488371e-06, "loss": 17.8984, "step": 7260 }, { "epoch": 0.1327252453982123, "grad_norm": 6.380160783041163, "learning_rate": 9.725913028496359e-06, "loss": 17.4098, "step": 7261 }, { "epoch": 0.13274352459465882, "grad_norm": 7.805516414406185, "learning_rate": 9.72581635994043e-06, "loss": 18.1524, "step": 7262 }, { "epoch": 0.13276180379110533, "grad_norm": 6.972621356864681, "learning_rate": 9.725719674820926e-06, "loss": 17.4139, "step": 7263 }, { "epoch": 0.13278008298755187, "grad_norm": 6.571282474587021, "learning_rate": 9.725622973138185e-06, "loss": 17.2932, "step": 7264 }, { "epoch": 0.1327983621839984, "grad_norm": 7.598398266168324, "learning_rate": 9.725526254892544e-06, "loss": 18.0883, "step": 7265 }, { "epoch": 0.1328166413804449, "grad_norm": 8.096978763128138, "learning_rate": 9.725429520084345e-06, "loss": 18.2204, "step": 7266 }, { "epoch": 0.13283492057689145, "grad_norm": 7.3218974170460225, "learning_rate": 9.725332768713924e-06, "loss": 18.1063, "step": 7267 }, { "epoch": 0.13285319977333795, "grad_norm": 6.885678643390233, "learning_rate": 9.725236000781623e-06, "loss": 17.9394, "step": 7268 }, { "epoch": 0.1328714789697845, "grad_norm": 6.873700299264502, "learning_rate": 9.72513921628778e-06, "loss": 17.9246, "step": 7269 }, { "epoch": 0.13288975816623103, "grad_norm": 6.7128758381037725, "learning_rate": 9.725042415232734e-06, "loss": 17.5954, "step": 7270 }, { "epoch": 0.13290803736267753, "grad_norm": 12.04240883480532, "learning_rate": 9.724945597616824e-06, "loss": 17.5589, "step": 7271 }, { "epoch": 0.13292631655912407, "grad_norm": 5.888560453507167, "learning_rate": 9.724848763440389e-06, "loss": 17.2392, "step": 7272 }, { "epoch": 0.13294459575557058, "grad_norm": 5.959386286115697, "learning_rate": 9.72475191270377e-06, "loss": 17.0283, "step": 7273 }, { "epoch": 0.1329628749520171, "grad_norm": 7.266865757445522, "learning_rate": 9.724655045407306e-06, "loss": 18.0163, "step": 7274 }, { "epoch": 0.13298115414846362, "grad_norm": 8.959452733145644, "learning_rate": 9.724558161551335e-06, "loss": 18.9249, "step": 7275 }, { "epoch": 0.13299943334491016, "grad_norm": 7.953370336785721, "learning_rate": 9.724461261136198e-06, "loss": 18.1367, "step": 7276 }, { "epoch": 0.1330177125413567, "grad_norm": 6.98834233648399, "learning_rate": 9.724364344162234e-06, "loss": 18.0126, "step": 7277 }, { "epoch": 0.1330359917378032, "grad_norm": 8.68664005119283, "learning_rate": 9.724267410629785e-06, "loss": 18.7515, "step": 7278 }, { "epoch": 0.13305427093424974, "grad_norm": 7.220871982516756, "learning_rate": 9.724170460539185e-06, "loss": 17.8004, "step": 7279 }, { "epoch": 0.13307255013069624, "grad_norm": 7.41225811833119, "learning_rate": 9.72407349389078e-06, "loss": 17.9176, "step": 7280 }, { "epoch": 0.13309082932714278, "grad_norm": 9.264816320895811, "learning_rate": 9.723976510684907e-06, "loss": 18.2521, "step": 7281 }, { "epoch": 0.13310910852358931, "grad_norm": 6.796793887604724, "learning_rate": 9.723879510921904e-06, "loss": 17.8195, "step": 7282 }, { "epoch": 0.13312738772003582, "grad_norm": 6.742387205753071, "learning_rate": 9.723782494602117e-06, "loss": 17.3863, "step": 7283 }, { "epoch": 0.13314566691648236, "grad_norm": 7.413465376909987, "learning_rate": 9.72368546172588e-06, "loss": 17.8029, "step": 7284 }, { "epoch": 0.13316394611292887, "grad_norm": 7.306590371199734, "learning_rate": 9.723588412293536e-06, "loss": 17.8976, "step": 7285 }, { "epoch": 0.1331822253093754, "grad_norm": 7.805619937583306, "learning_rate": 9.723491346305426e-06, "loss": 18.1022, "step": 7286 }, { "epoch": 0.13320050450582194, "grad_norm": 9.380610587391823, "learning_rate": 9.723394263761885e-06, "loss": 18.6342, "step": 7287 }, { "epoch": 0.13321878370226845, "grad_norm": 7.2669165607591655, "learning_rate": 9.72329716466326e-06, "loss": 17.7078, "step": 7288 }, { "epoch": 0.13323706289871498, "grad_norm": 6.1314312177878065, "learning_rate": 9.723200049009886e-06, "loss": 17.2683, "step": 7289 }, { "epoch": 0.1332553420951615, "grad_norm": 7.459467060270579, "learning_rate": 9.723102916802108e-06, "loss": 17.8149, "step": 7290 }, { "epoch": 0.13327362129160802, "grad_norm": 5.240316852090448, "learning_rate": 9.723005768040264e-06, "loss": 16.8855, "step": 7291 }, { "epoch": 0.13329190048805453, "grad_norm": 6.234999252504696, "learning_rate": 9.722908602724693e-06, "loss": 17.2466, "step": 7292 }, { "epoch": 0.13331017968450107, "grad_norm": 7.8177911789367, "learning_rate": 9.722811420855738e-06, "loss": 18.1054, "step": 7293 }, { "epoch": 0.1333284588809476, "grad_norm": 7.301076125871522, "learning_rate": 9.722714222433738e-06, "loss": 17.9058, "step": 7294 }, { "epoch": 0.1333467380773941, "grad_norm": 7.592960509600471, "learning_rate": 9.722617007459037e-06, "loss": 18.154, "step": 7295 }, { "epoch": 0.13336501727384065, "grad_norm": 6.44851950623422, "learning_rate": 9.72251977593197e-06, "loss": 17.6372, "step": 7296 }, { "epoch": 0.13338329647028715, "grad_norm": 6.131893496868285, "learning_rate": 9.722422527852883e-06, "loss": 17.4133, "step": 7297 }, { "epoch": 0.1334015756667337, "grad_norm": 7.4913639236958, "learning_rate": 9.722325263222114e-06, "loss": 17.7869, "step": 7298 }, { "epoch": 0.13341985486318023, "grad_norm": 8.281805478945252, "learning_rate": 9.722227982040004e-06, "loss": 18.1848, "step": 7299 }, { "epoch": 0.13343813405962673, "grad_norm": 8.013137361924885, "learning_rate": 9.722130684306897e-06, "loss": 18.0568, "step": 7300 }, { "epoch": 0.13345641325607327, "grad_norm": 6.442051320790764, "learning_rate": 9.722033370023129e-06, "loss": 17.4183, "step": 7301 }, { "epoch": 0.13347469245251978, "grad_norm": 7.030411239913883, "learning_rate": 9.721936039189046e-06, "loss": 17.4873, "step": 7302 }, { "epoch": 0.1334929716489663, "grad_norm": 8.713878967086993, "learning_rate": 9.721838691804986e-06, "loss": 18.4822, "step": 7303 }, { "epoch": 0.13351125084541285, "grad_norm": 7.3964301208580014, "learning_rate": 9.721741327871291e-06, "loss": 17.7409, "step": 7304 }, { "epoch": 0.13352953004185936, "grad_norm": 6.409802596344187, "learning_rate": 9.721643947388304e-06, "loss": 17.4356, "step": 7305 }, { "epoch": 0.1335478092383059, "grad_norm": 6.553199538093425, "learning_rate": 9.721546550356362e-06, "loss": 17.635, "step": 7306 }, { "epoch": 0.1335660884347524, "grad_norm": 7.855535169751103, "learning_rate": 9.721449136775811e-06, "loss": 17.5149, "step": 7307 }, { "epoch": 0.13358436763119894, "grad_norm": 7.926555715016358, "learning_rate": 9.72135170664699e-06, "loss": 17.9294, "step": 7308 }, { "epoch": 0.13360264682764544, "grad_norm": 8.247210705388246, "learning_rate": 9.721254259970241e-06, "loss": 17.9499, "step": 7309 }, { "epoch": 0.13362092602409198, "grad_norm": 6.916463039840437, "learning_rate": 9.721156796745905e-06, "loss": 17.5853, "step": 7310 }, { "epoch": 0.13363920522053852, "grad_norm": 8.408292063266318, "learning_rate": 9.721059316974324e-06, "loss": 17.7235, "step": 7311 }, { "epoch": 0.13365748441698502, "grad_norm": 6.352660543096414, "learning_rate": 9.72096182065584e-06, "loss": 17.5232, "step": 7312 }, { "epoch": 0.13367576361343156, "grad_norm": 7.349600166175085, "learning_rate": 9.720864307790796e-06, "loss": 18.1619, "step": 7313 }, { "epoch": 0.13369404280987807, "grad_norm": 7.874987001024496, "learning_rate": 9.720766778379531e-06, "loss": 18.2057, "step": 7314 }, { "epoch": 0.1337123220063246, "grad_norm": 7.334889029040655, "learning_rate": 9.720669232422388e-06, "loss": 17.6721, "step": 7315 }, { "epoch": 0.13373060120277114, "grad_norm": 6.819419836540446, "learning_rate": 9.72057166991971e-06, "loss": 17.7121, "step": 7316 }, { "epoch": 0.13374888039921765, "grad_norm": 6.804240942531425, "learning_rate": 9.720474090871836e-06, "loss": 17.8763, "step": 7317 }, { "epoch": 0.13376715959566418, "grad_norm": 5.970110473439147, "learning_rate": 9.720376495279111e-06, "loss": 17.3412, "step": 7318 }, { "epoch": 0.1337854387921107, "grad_norm": 7.229349135900173, "learning_rate": 9.720278883141876e-06, "loss": 17.9148, "step": 7319 }, { "epoch": 0.13380371798855722, "grad_norm": 8.226883619932043, "learning_rate": 9.720181254460473e-06, "loss": 18.4782, "step": 7320 }, { "epoch": 0.13382199718500376, "grad_norm": 7.727062975450835, "learning_rate": 9.720083609235244e-06, "loss": 18.0103, "step": 7321 }, { "epoch": 0.13384027638145027, "grad_norm": 7.473018375793038, "learning_rate": 9.719985947466532e-06, "loss": 17.8981, "step": 7322 }, { "epoch": 0.1338585555778968, "grad_norm": 6.779991513233267, "learning_rate": 9.719888269154679e-06, "loss": 17.7578, "step": 7323 }, { "epoch": 0.1338768347743433, "grad_norm": 8.484725122790097, "learning_rate": 9.719790574300026e-06, "loss": 18.3271, "step": 7324 }, { "epoch": 0.13389511397078985, "grad_norm": 6.646616396806981, "learning_rate": 9.719692862902919e-06, "loss": 17.3037, "step": 7325 }, { "epoch": 0.13391339316723636, "grad_norm": 7.235995780937685, "learning_rate": 9.719595134963694e-06, "loss": 17.7542, "step": 7326 }, { "epoch": 0.1339316723636829, "grad_norm": 7.072527221787121, "learning_rate": 9.719497390482701e-06, "loss": 17.8879, "step": 7327 }, { "epoch": 0.13394995156012943, "grad_norm": 6.8811664976307645, "learning_rate": 9.71939962946028e-06, "loss": 17.525, "step": 7328 }, { "epoch": 0.13396823075657593, "grad_norm": 8.695005723193574, "learning_rate": 9.71930185189677e-06, "loss": 18.2196, "step": 7329 }, { "epoch": 0.13398650995302247, "grad_norm": 6.1970851068351935, "learning_rate": 9.719204057792517e-06, "loss": 17.4303, "step": 7330 }, { "epoch": 0.13400478914946898, "grad_norm": 7.213331679947561, "learning_rate": 9.719106247147864e-06, "loss": 17.6202, "step": 7331 }, { "epoch": 0.1340230683459155, "grad_norm": 5.970498912276426, "learning_rate": 9.719008419963153e-06, "loss": 17.2458, "step": 7332 }, { "epoch": 0.13404134754236205, "grad_norm": 7.097097701811893, "learning_rate": 9.718910576238728e-06, "loss": 17.7279, "step": 7333 }, { "epoch": 0.13405962673880856, "grad_norm": 7.759600890157557, "learning_rate": 9.71881271597493e-06, "loss": 17.9428, "step": 7334 }, { "epoch": 0.1340779059352551, "grad_norm": 6.609218161336705, "learning_rate": 9.718714839172103e-06, "loss": 17.6602, "step": 7335 }, { "epoch": 0.1340961851317016, "grad_norm": 7.715512359303132, "learning_rate": 9.71861694583059e-06, "loss": 18.1546, "step": 7336 }, { "epoch": 0.13411446432814814, "grad_norm": 7.113875413837293, "learning_rate": 9.718519035950733e-06, "loss": 17.6451, "step": 7337 }, { "epoch": 0.13413274352459467, "grad_norm": 6.7710599191576515, "learning_rate": 9.718421109532879e-06, "loss": 17.474, "step": 7338 }, { "epoch": 0.13415102272104118, "grad_norm": 7.337391508280774, "learning_rate": 9.718323166577367e-06, "loss": 17.5065, "step": 7339 }, { "epoch": 0.13416930191748772, "grad_norm": 6.347988914894903, "learning_rate": 9.718225207084539e-06, "loss": 17.6424, "step": 7340 }, { "epoch": 0.13418758111393422, "grad_norm": 7.442010441490582, "learning_rate": 9.718127231054745e-06, "loss": 18.2821, "step": 7341 }, { "epoch": 0.13420586031038076, "grad_norm": 7.816204751329472, "learning_rate": 9.718029238488322e-06, "loss": 18.1054, "step": 7342 }, { "epoch": 0.13422413950682727, "grad_norm": 6.817636326102619, "learning_rate": 9.717931229385618e-06, "loss": 17.7242, "step": 7343 }, { "epoch": 0.1342424187032738, "grad_norm": 7.613804038438869, "learning_rate": 9.717833203746974e-06, "loss": 17.922, "step": 7344 }, { "epoch": 0.13426069789972034, "grad_norm": 6.112081337830192, "learning_rate": 9.717735161572732e-06, "loss": 17.2962, "step": 7345 }, { "epoch": 0.13427897709616685, "grad_norm": 6.61055652895237, "learning_rate": 9.71763710286324e-06, "loss": 17.5574, "step": 7346 }, { "epoch": 0.13429725629261338, "grad_norm": 7.126469762062783, "learning_rate": 9.717539027618837e-06, "loss": 17.8764, "step": 7347 }, { "epoch": 0.1343155354890599, "grad_norm": 6.825928999519179, "learning_rate": 9.71744093583987e-06, "loss": 17.5086, "step": 7348 }, { "epoch": 0.13433381468550643, "grad_norm": 6.6660706796394, "learning_rate": 9.717342827526684e-06, "loss": 17.8061, "step": 7349 }, { "epoch": 0.13435209388195296, "grad_norm": 9.300381817120178, "learning_rate": 9.717244702679618e-06, "loss": 17.9765, "step": 7350 }, { "epoch": 0.13437037307839947, "grad_norm": 6.728624399804422, "learning_rate": 9.71714656129902e-06, "loss": 17.5401, "step": 7351 }, { "epoch": 0.134388652274846, "grad_norm": 6.380027503238772, "learning_rate": 9.717048403385231e-06, "loss": 17.2511, "step": 7352 }, { "epoch": 0.1344069314712925, "grad_norm": 7.939909625296418, "learning_rate": 9.7169502289386e-06, "loss": 18.3813, "step": 7353 }, { "epoch": 0.13442521066773905, "grad_norm": 5.937995054653439, "learning_rate": 9.716852037959465e-06, "loss": 17.3262, "step": 7354 }, { "epoch": 0.13444348986418558, "grad_norm": 6.869378397025138, "learning_rate": 9.716753830448174e-06, "loss": 17.6853, "step": 7355 }, { "epoch": 0.1344617690606321, "grad_norm": 6.967266107514495, "learning_rate": 9.71665560640507e-06, "loss": 17.6118, "step": 7356 }, { "epoch": 0.13448004825707863, "grad_norm": 6.354453718530233, "learning_rate": 9.716557365830496e-06, "loss": 17.3243, "step": 7357 }, { "epoch": 0.13449832745352513, "grad_norm": 7.8805286975140545, "learning_rate": 9.716459108724799e-06, "loss": 18.221, "step": 7358 }, { "epoch": 0.13451660664997167, "grad_norm": 7.128020043174866, "learning_rate": 9.716360835088324e-06, "loss": 17.9753, "step": 7359 }, { "epoch": 0.13453488584641818, "grad_norm": 6.383961886304486, "learning_rate": 9.716262544921411e-06, "loss": 17.5263, "step": 7360 }, { "epoch": 0.13455316504286471, "grad_norm": 7.271209331479864, "learning_rate": 9.716164238224406e-06, "loss": 18.0944, "step": 7361 }, { "epoch": 0.13457144423931125, "grad_norm": 6.937910866916811, "learning_rate": 9.716065914997657e-06, "loss": 17.8308, "step": 7362 }, { "epoch": 0.13458972343575776, "grad_norm": 6.288875662832325, "learning_rate": 9.715967575241506e-06, "loss": 17.503, "step": 7363 }, { "epoch": 0.1346080026322043, "grad_norm": 7.058704016267175, "learning_rate": 9.715869218956297e-06, "loss": 17.6936, "step": 7364 }, { "epoch": 0.1346262818286508, "grad_norm": 6.839820264143985, "learning_rate": 9.715770846142376e-06, "loss": 17.9178, "step": 7365 }, { "epoch": 0.13464456102509734, "grad_norm": 11.633991662685553, "learning_rate": 9.715672456800087e-06, "loss": 19.238, "step": 7366 }, { "epoch": 0.13466284022154387, "grad_norm": 7.89246996117323, "learning_rate": 9.715574050929775e-06, "loss": 18.4244, "step": 7367 }, { "epoch": 0.13468111941799038, "grad_norm": 8.79725255643353, "learning_rate": 9.715475628531785e-06, "loss": 18.4606, "step": 7368 }, { "epoch": 0.13469939861443692, "grad_norm": 7.29585189884504, "learning_rate": 9.715377189606462e-06, "loss": 17.928, "step": 7369 }, { "epoch": 0.13471767781088342, "grad_norm": 7.865076076181476, "learning_rate": 9.715278734154155e-06, "loss": 17.7899, "step": 7370 }, { "epoch": 0.13473595700732996, "grad_norm": 6.006823076315517, "learning_rate": 9.715180262175202e-06, "loss": 17.1828, "step": 7371 }, { "epoch": 0.1347542362037765, "grad_norm": 6.490649690145237, "learning_rate": 9.715081773669949e-06, "loss": 17.432, "step": 7372 }, { "epoch": 0.134772515400223, "grad_norm": 8.093132151465447, "learning_rate": 9.714983268638747e-06, "loss": 18.1537, "step": 7373 }, { "epoch": 0.13479079459666954, "grad_norm": 7.385779412925701, "learning_rate": 9.714884747081937e-06, "loss": 18.2123, "step": 7374 }, { "epoch": 0.13480907379311605, "grad_norm": 6.923081296403451, "learning_rate": 9.714786208999864e-06, "loss": 17.7324, "step": 7375 }, { "epoch": 0.13482735298956258, "grad_norm": 8.347399195705641, "learning_rate": 9.714687654392876e-06, "loss": 18.5434, "step": 7376 }, { "epoch": 0.1348456321860091, "grad_norm": 6.52387836191315, "learning_rate": 9.714589083261316e-06, "loss": 17.4424, "step": 7377 }, { "epoch": 0.13486391138245563, "grad_norm": 7.227908509359091, "learning_rate": 9.714490495605531e-06, "loss": 17.6268, "step": 7378 }, { "epoch": 0.13488219057890216, "grad_norm": 7.676719661969092, "learning_rate": 9.714391891425866e-06, "loss": 17.5231, "step": 7379 }, { "epoch": 0.13490046977534867, "grad_norm": 7.507731889119987, "learning_rate": 9.714293270722665e-06, "loss": 17.7889, "step": 7380 }, { "epoch": 0.1349187489717952, "grad_norm": 7.489507476864272, "learning_rate": 9.714194633496276e-06, "loss": 17.8013, "step": 7381 }, { "epoch": 0.1349370281682417, "grad_norm": 7.1894071929284715, "learning_rate": 9.714095979747044e-06, "loss": 17.9802, "step": 7382 }, { "epoch": 0.13495530736468825, "grad_norm": 11.852791486406433, "learning_rate": 9.713997309475316e-06, "loss": 17.1034, "step": 7383 }, { "epoch": 0.13497358656113478, "grad_norm": 8.161722880080935, "learning_rate": 9.713898622681436e-06, "loss": 18.4728, "step": 7384 }, { "epoch": 0.1349918657575813, "grad_norm": 7.143327922896236, "learning_rate": 9.71379991936575e-06, "loss": 17.7502, "step": 7385 }, { "epoch": 0.13501014495402783, "grad_norm": 5.735791390923804, "learning_rate": 9.713701199528602e-06, "loss": 17.2922, "step": 7386 }, { "epoch": 0.13502842415047434, "grad_norm": 6.952909023256605, "learning_rate": 9.713602463170345e-06, "loss": 17.6708, "step": 7387 }, { "epoch": 0.13504670334692087, "grad_norm": 7.659718136904871, "learning_rate": 9.713503710291317e-06, "loss": 17.9768, "step": 7388 }, { "epoch": 0.1350649825433674, "grad_norm": 7.496735274558631, "learning_rate": 9.713404940891867e-06, "loss": 17.7919, "step": 7389 }, { "epoch": 0.13508326173981391, "grad_norm": 7.251590006579304, "learning_rate": 9.713306154972344e-06, "loss": 17.8879, "step": 7390 }, { "epoch": 0.13510154093626045, "grad_norm": 8.674042320462984, "learning_rate": 9.71320735253309e-06, "loss": 18.4063, "step": 7391 }, { "epoch": 0.13511982013270696, "grad_norm": 8.016817300470889, "learning_rate": 9.713108533574455e-06, "loss": 18.3258, "step": 7392 }, { "epoch": 0.1351380993291535, "grad_norm": 8.244659610467385, "learning_rate": 9.713009698096782e-06, "loss": 18.2969, "step": 7393 }, { "epoch": 0.1351563785256, "grad_norm": 6.014535679292738, "learning_rate": 9.71291084610042e-06, "loss": 17.2641, "step": 7394 }, { "epoch": 0.13517465772204654, "grad_norm": 6.153890770683312, "learning_rate": 9.712811977585715e-06, "loss": 17.3391, "step": 7395 }, { "epoch": 0.13519293691849307, "grad_norm": 7.288188074912993, "learning_rate": 9.712713092553012e-06, "loss": 18.0131, "step": 7396 }, { "epoch": 0.13521121611493958, "grad_norm": 6.553780700162622, "learning_rate": 9.712614191002657e-06, "loss": 17.7899, "step": 7397 }, { "epoch": 0.13522949531138612, "grad_norm": 6.867287067987206, "learning_rate": 9.712515272935e-06, "loss": 17.8165, "step": 7398 }, { "epoch": 0.13524777450783262, "grad_norm": 6.399640444184357, "learning_rate": 9.712416338350386e-06, "loss": 17.4553, "step": 7399 }, { "epoch": 0.13526605370427916, "grad_norm": 8.089816224610908, "learning_rate": 9.712317387249162e-06, "loss": 17.7951, "step": 7400 }, { "epoch": 0.1352843329007257, "grad_norm": 7.140421054078072, "learning_rate": 9.712218419631673e-06, "loss": 17.9161, "step": 7401 }, { "epoch": 0.1353026120971722, "grad_norm": 6.5962874189656855, "learning_rate": 9.712119435498268e-06, "loss": 17.5488, "step": 7402 }, { "epoch": 0.13532089129361874, "grad_norm": 6.718803772161681, "learning_rate": 9.712020434849294e-06, "loss": 17.8015, "step": 7403 }, { "epoch": 0.13533917049006525, "grad_norm": 7.2415029216503575, "learning_rate": 9.711921417685097e-06, "loss": 18.0733, "step": 7404 }, { "epoch": 0.13535744968651178, "grad_norm": 7.816229348832906, "learning_rate": 9.711822384006025e-06, "loss": 17.9967, "step": 7405 }, { "epoch": 0.13537572888295832, "grad_norm": 7.116408758357093, "learning_rate": 9.711723333812422e-06, "loss": 17.8816, "step": 7406 }, { "epoch": 0.13539400807940483, "grad_norm": 5.932070533501284, "learning_rate": 9.71162426710464e-06, "loss": 17.273, "step": 7407 }, { "epoch": 0.13541228727585136, "grad_norm": 6.6459074293993, "learning_rate": 9.711525183883021e-06, "loss": 17.7613, "step": 7408 }, { "epoch": 0.13543056647229787, "grad_norm": 6.422762667907978, "learning_rate": 9.711426084147918e-06, "loss": 17.4116, "step": 7409 }, { "epoch": 0.1354488456687444, "grad_norm": 6.265882057739489, "learning_rate": 9.711326967899674e-06, "loss": 17.65, "step": 7410 }, { "epoch": 0.1354671248651909, "grad_norm": 8.118062402360124, "learning_rate": 9.71122783513864e-06, "loss": 18.3274, "step": 7411 }, { "epoch": 0.13548540406163745, "grad_norm": 6.59997902432031, "learning_rate": 9.711128685865158e-06, "loss": 17.4206, "step": 7412 }, { "epoch": 0.13550368325808398, "grad_norm": 6.698676748752259, "learning_rate": 9.71102952007958e-06, "loss": 17.6831, "step": 7413 }, { "epoch": 0.1355219624545305, "grad_norm": 7.462208115105021, "learning_rate": 9.710930337782254e-06, "loss": 17.9643, "step": 7414 }, { "epoch": 0.13554024165097703, "grad_norm": 6.881641144148816, "learning_rate": 9.710831138973524e-06, "loss": 17.6377, "step": 7415 }, { "epoch": 0.13555852084742354, "grad_norm": 6.74850301229193, "learning_rate": 9.71073192365374e-06, "loss": 17.9104, "step": 7416 }, { "epoch": 0.13557680004387007, "grad_norm": 7.372550253170825, "learning_rate": 9.710632691823249e-06, "loss": 17.9916, "step": 7417 }, { "epoch": 0.1355950792403166, "grad_norm": 8.937901853678206, "learning_rate": 9.710533443482399e-06, "loss": 18.3555, "step": 7418 }, { "epoch": 0.13561335843676312, "grad_norm": 6.817766288220093, "learning_rate": 9.71043417863154e-06, "loss": 17.8772, "step": 7419 }, { "epoch": 0.13563163763320965, "grad_norm": 7.757249117581919, "learning_rate": 9.710334897271016e-06, "loss": 18.1575, "step": 7420 }, { "epoch": 0.13564991682965616, "grad_norm": 6.063796383370273, "learning_rate": 9.71023559940118e-06, "loss": 17.3542, "step": 7421 }, { "epoch": 0.1356681960261027, "grad_norm": 7.686602160021942, "learning_rate": 9.710136285022374e-06, "loss": 18.175, "step": 7422 }, { "epoch": 0.13568647522254923, "grad_norm": 8.243732315581704, "learning_rate": 9.710036954134948e-06, "loss": 18.5862, "step": 7423 }, { "epoch": 0.13570475441899574, "grad_norm": 7.6194402466320055, "learning_rate": 9.709937606739252e-06, "loss": 18.1703, "step": 7424 }, { "epoch": 0.13572303361544227, "grad_norm": 6.469208520653512, "learning_rate": 9.709838242835635e-06, "loss": 17.5793, "step": 7425 }, { "epoch": 0.13574131281188878, "grad_norm": 6.219589586439911, "learning_rate": 9.709738862424442e-06, "loss": 17.587, "step": 7426 }, { "epoch": 0.13575959200833532, "grad_norm": 8.22034380512448, "learning_rate": 9.709639465506026e-06, "loss": 17.7433, "step": 7427 }, { "epoch": 0.13577787120478182, "grad_norm": 6.614378456818435, "learning_rate": 9.70954005208073e-06, "loss": 17.4989, "step": 7428 }, { "epoch": 0.13579615040122836, "grad_norm": 6.906101381445755, "learning_rate": 9.709440622148905e-06, "loss": 17.7696, "step": 7429 }, { "epoch": 0.1358144295976749, "grad_norm": 6.914231304372284, "learning_rate": 9.709341175710899e-06, "loss": 17.7633, "step": 7430 }, { "epoch": 0.1358327087941214, "grad_norm": 7.924595493657493, "learning_rate": 9.709241712767062e-06, "loss": 18.3542, "step": 7431 }, { "epoch": 0.13585098799056794, "grad_norm": 7.733756354225041, "learning_rate": 9.709142233317739e-06, "loss": 17.9405, "step": 7432 }, { "epoch": 0.13586926718701445, "grad_norm": 8.678824703147397, "learning_rate": 9.709042737363283e-06, "loss": 18.6903, "step": 7433 }, { "epoch": 0.13588754638346098, "grad_norm": 8.161165981960721, "learning_rate": 9.708943224904041e-06, "loss": 18.1929, "step": 7434 }, { "epoch": 0.13590582557990752, "grad_norm": 5.921200411436585, "learning_rate": 9.70884369594036e-06, "loss": 17.2303, "step": 7435 }, { "epoch": 0.13592410477635403, "grad_norm": 7.3493418588230375, "learning_rate": 9.708744150472594e-06, "loss": 17.6725, "step": 7436 }, { "epoch": 0.13594238397280056, "grad_norm": 8.713112108397668, "learning_rate": 9.708644588501084e-06, "loss": 18.2403, "step": 7437 }, { "epoch": 0.13596066316924707, "grad_norm": 8.260071257944007, "learning_rate": 9.708545010026187e-06, "loss": 18.6511, "step": 7438 }, { "epoch": 0.1359789423656936, "grad_norm": 7.311836953155976, "learning_rate": 9.708445415048245e-06, "loss": 17.7767, "step": 7439 }, { "epoch": 0.13599722156214014, "grad_norm": 8.075841208882839, "learning_rate": 9.708345803567612e-06, "loss": 18.1745, "step": 7440 }, { "epoch": 0.13601550075858665, "grad_norm": 6.461272470575603, "learning_rate": 9.708246175584637e-06, "loss": 17.6392, "step": 7441 }, { "epoch": 0.13603377995503318, "grad_norm": 6.3777355538496545, "learning_rate": 9.708146531099665e-06, "loss": 17.4888, "step": 7442 }, { "epoch": 0.1360520591514797, "grad_norm": 6.305521191061348, "learning_rate": 9.70804687011305e-06, "loss": 17.4775, "step": 7443 }, { "epoch": 0.13607033834792623, "grad_norm": 9.468671047870306, "learning_rate": 9.707947192625137e-06, "loss": 18.7227, "step": 7444 }, { "epoch": 0.13608861754437274, "grad_norm": 7.909827586776542, "learning_rate": 9.70784749863628e-06, "loss": 18.0857, "step": 7445 }, { "epoch": 0.13610689674081927, "grad_norm": 6.3380149814229085, "learning_rate": 9.707747788146826e-06, "loss": 17.3367, "step": 7446 }, { "epoch": 0.1361251759372658, "grad_norm": 7.207068106258375, "learning_rate": 9.707648061157124e-06, "loss": 18.0903, "step": 7447 }, { "epoch": 0.13614345513371232, "grad_norm": 9.115628758208079, "learning_rate": 9.707548317667523e-06, "loss": 18.3233, "step": 7448 }, { "epoch": 0.13616173433015885, "grad_norm": 7.762250566589427, "learning_rate": 9.707448557678374e-06, "loss": 17.9831, "step": 7449 }, { "epoch": 0.13618001352660536, "grad_norm": 7.74005275425354, "learning_rate": 9.707348781190028e-06, "loss": 18.019, "step": 7450 }, { "epoch": 0.1361982927230519, "grad_norm": 6.637406948041515, "learning_rate": 9.707248988202832e-06, "loss": 17.4043, "step": 7451 }, { "epoch": 0.13621657191949843, "grad_norm": 6.835654410013408, "learning_rate": 9.707149178717136e-06, "loss": 17.9024, "step": 7452 }, { "epoch": 0.13623485111594494, "grad_norm": 6.473983229279323, "learning_rate": 9.70704935273329e-06, "loss": 17.3907, "step": 7453 }, { "epoch": 0.13625313031239147, "grad_norm": 7.516646582083344, "learning_rate": 9.706949510251647e-06, "loss": 18.1707, "step": 7454 }, { "epoch": 0.13627140950883798, "grad_norm": 9.462754966995899, "learning_rate": 9.706849651272551e-06, "loss": 18.5969, "step": 7455 }, { "epoch": 0.13628968870528452, "grad_norm": 7.452679335354314, "learning_rate": 9.706749775796359e-06, "loss": 18.1997, "step": 7456 }, { "epoch": 0.13630796790173105, "grad_norm": 8.791480284434773, "learning_rate": 9.706649883823415e-06, "loss": 18.5841, "step": 7457 }, { "epoch": 0.13632624709817756, "grad_norm": 7.897907052793343, "learning_rate": 9.706549975354073e-06, "loss": 18.1403, "step": 7458 }, { "epoch": 0.1363445262946241, "grad_norm": 8.894721361673117, "learning_rate": 9.70645005038868e-06, "loss": 18.3676, "step": 7459 }, { "epoch": 0.1363628054910706, "grad_norm": 6.520883671534357, "learning_rate": 9.70635010892759e-06, "loss": 17.631, "step": 7460 }, { "epoch": 0.13638108468751714, "grad_norm": 7.071421314080508, "learning_rate": 9.70625015097115e-06, "loss": 17.8233, "step": 7461 }, { "epoch": 0.13639936388396365, "grad_norm": 5.6300301550366925, "learning_rate": 9.706150176519713e-06, "loss": 17.1715, "step": 7462 }, { "epoch": 0.13641764308041018, "grad_norm": 7.852800304353618, "learning_rate": 9.706050185573626e-06, "loss": 17.9435, "step": 7463 }, { "epoch": 0.13643592227685672, "grad_norm": 6.699257770943432, "learning_rate": 9.705950178133243e-06, "loss": 17.821, "step": 7464 }, { "epoch": 0.13645420147330323, "grad_norm": 6.588299143131586, "learning_rate": 9.705850154198912e-06, "loss": 17.5653, "step": 7465 }, { "epoch": 0.13647248066974976, "grad_norm": 6.66726651315699, "learning_rate": 9.705750113770986e-06, "loss": 17.6059, "step": 7466 }, { "epoch": 0.13649075986619627, "grad_norm": 6.233600401310327, "learning_rate": 9.705650056849813e-06, "loss": 17.589, "step": 7467 }, { "epoch": 0.1365090390626428, "grad_norm": 6.968923947025399, "learning_rate": 9.705549983435744e-06, "loss": 17.9795, "step": 7468 }, { "epoch": 0.13652731825908934, "grad_norm": 7.938093167827697, "learning_rate": 9.705449893529133e-06, "loss": 17.9217, "step": 7469 }, { "epoch": 0.13654559745553585, "grad_norm": 6.998871223151411, "learning_rate": 9.705349787130327e-06, "loss": 17.6893, "step": 7470 }, { "epoch": 0.13656387665198239, "grad_norm": 7.820470612951028, "learning_rate": 9.70524966423968e-06, "loss": 18.362, "step": 7471 }, { "epoch": 0.1365821558484289, "grad_norm": 7.775147417972436, "learning_rate": 9.705149524857539e-06, "loss": 18.1892, "step": 7472 }, { "epoch": 0.13660043504487543, "grad_norm": 6.673182990397212, "learning_rate": 9.705049368984259e-06, "loss": 17.817, "step": 7473 }, { "epoch": 0.13661871424132196, "grad_norm": 5.406971223600896, "learning_rate": 9.704949196620188e-06, "loss": 17.0513, "step": 7474 }, { "epoch": 0.13663699343776847, "grad_norm": 6.563913203413318, "learning_rate": 9.704849007765677e-06, "loss": 17.3962, "step": 7475 }, { "epoch": 0.136655272634215, "grad_norm": 7.338919945878583, "learning_rate": 9.70474880242108e-06, "loss": 17.6328, "step": 7476 }, { "epoch": 0.13667355183066152, "grad_norm": 7.261563972332712, "learning_rate": 9.704648580586748e-06, "loss": 17.7875, "step": 7477 }, { "epoch": 0.13669183102710805, "grad_norm": 8.396518761867243, "learning_rate": 9.704548342263029e-06, "loss": 18.0135, "step": 7478 }, { "epoch": 0.13671011022355456, "grad_norm": 6.422952971004934, "learning_rate": 9.704448087450278e-06, "loss": 17.7736, "step": 7479 }, { "epoch": 0.1367283894200011, "grad_norm": 6.094479934565342, "learning_rate": 9.704347816148842e-06, "loss": 17.4413, "step": 7480 }, { "epoch": 0.13674666861644763, "grad_norm": 8.832583222945484, "learning_rate": 9.704247528359079e-06, "loss": 18.0457, "step": 7481 }, { "epoch": 0.13676494781289414, "grad_norm": 6.953333687060656, "learning_rate": 9.704147224081332e-06, "loss": 17.827, "step": 7482 }, { "epoch": 0.13678322700934067, "grad_norm": 6.900389147758031, "learning_rate": 9.70404690331596e-06, "loss": 17.7573, "step": 7483 }, { "epoch": 0.13680150620578718, "grad_norm": 6.581673943697085, "learning_rate": 9.703946566063314e-06, "loss": 17.6126, "step": 7484 }, { "epoch": 0.13681978540223372, "grad_norm": 6.340699010437135, "learning_rate": 9.703846212323739e-06, "loss": 17.4173, "step": 7485 }, { "epoch": 0.13683806459868025, "grad_norm": 6.756441239325302, "learning_rate": 9.703745842097594e-06, "loss": 17.5447, "step": 7486 }, { "epoch": 0.13685634379512676, "grad_norm": 7.340464170563029, "learning_rate": 9.703645455385227e-06, "loss": 18.3228, "step": 7487 }, { "epoch": 0.1368746229915733, "grad_norm": 6.415627907213983, "learning_rate": 9.703545052186992e-06, "loss": 17.6938, "step": 7488 }, { "epoch": 0.1368929021880198, "grad_norm": 7.493575468964602, "learning_rate": 9.703444632503239e-06, "loss": 17.7694, "step": 7489 }, { "epoch": 0.13691118138446634, "grad_norm": 5.726018751368653, "learning_rate": 9.703344196334319e-06, "loss": 17.1624, "step": 7490 }, { "epoch": 0.13692946058091288, "grad_norm": 6.382580616173418, "learning_rate": 9.703243743680589e-06, "loss": 17.5575, "step": 7491 }, { "epoch": 0.13694773977735938, "grad_norm": 8.02022727836731, "learning_rate": 9.703143274542395e-06, "loss": 18.0148, "step": 7492 }, { "epoch": 0.13696601897380592, "grad_norm": 7.796984691625947, "learning_rate": 9.703042788920094e-06, "loss": 18.2498, "step": 7493 }, { "epoch": 0.13698429817025243, "grad_norm": 6.847058154079785, "learning_rate": 9.702942286814034e-06, "loss": 17.5306, "step": 7494 }, { "epoch": 0.13700257736669896, "grad_norm": 6.693193351252166, "learning_rate": 9.70284176822457e-06, "loss": 17.6616, "step": 7495 }, { "epoch": 0.13702085656314547, "grad_norm": 6.468462283579735, "learning_rate": 9.702741233152055e-06, "loss": 17.6046, "step": 7496 }, { "epoch": 0.137039135759592, "grad_norm": 8.447953586046685, "learning_rate": 9.702640681596839e-06, "loss": 18.14, "step": 7497 }, { "epoch": 0.13705741495603854, "grad_norm": 6.7410512609657784, "learning_rate": 9.702540113559276e-06, "loss": 17.4294, "step": 7498 }, { "epoch": 0.13707569415248505, "grad_norm": 7.260428605576361, "learning_rate": 9.702439529039718e-06, "loss": 18.0614, "step": 7499 }, { "epoch": 0.13709397334893159, "grad_norm": 7.358399236749688, "learning_rate": 9.702338928038517e-06, "loss": 18.1374, "step": 7500 }, { "epoch": 0.1371122525453781, "grad_norm": 6.405876792649341, "learning_rate": 9.702238310556027e-06, "loss": 17.6907, "step": 7501 }, { "epoch": 0.13713053174182463, "grad_norm": 7.4062837846026985, "learning_rate": 9.702137676592598e-06, "loss": 17.8288, "step": 7502 }, { "epoch": 0.13714881093827117, "grad_norm": 6.830382935003411, "learning_rate": 9.702037026148586e-06, "loss": 17.947, "step": 7503 }, { "epoch": 0.13716709013471767, "grad_norm": 7.770829752486227, "learning_rate": 9.701936359224341e-06, "loss": 18.0785, "step": 7504 }, { "epoch": 0.1371853693311642, "grad_norm": 7.660208673592639, "learning_rate": 9.701835675820218e-06, "loss": 18.3223, "step": 7505 }, { "epoch": 0.13720364852761072, "grad_norm": 7.583835662282823, "learning_rate": 9.701734975936568e-06, "loss": 18.0849, "step": 7506 }, { "epoch": 0.13722192772405725, "grad_norm": 6.956978730707371, "learning_rate": 9.701634259573747e-06, "loss": 17.9131, "step": 7507 }, { "epoch": 0.1372402069205038, "grad_norm": 7.649369274987982, "learning_rate": 9.701533526732104e-06, "loss": 17.9172, "step": 7508 }, { "epoch": 0.1372584861169503, "grad_norm": 6.578214212991516, "learning_rate": 9.701432777411995e-06, "loss": 17.5643, "step": 7509 }, { "epoch": 0.13727676531339683, "grad_norm": 5.534817711350081, "learning_rate": 9.701332011613771e-06, "loss": 17.0359, "step": 7510 }, { "epoch": 0.13729504450984334, "grad_norm": 6.667518505261539, "learning_rate": 9.701231229337788e-06, "loss": 17.6601, "step": 7511 }, { "epoch": 0.13731332370628987, "grad_norm": 8.121299665199137, "learning_rate": 9.701130430584396e-06, "loss": 17.7347, "step": 7512 }, { "epoch": 0.13733160290273638, "grad_norm": 7.05480122411721, "learning_rate": 9.701029615353949e-06, "loss": 17.9691, "step": 7513 }, { "epoch": 0.13734988209918292, "grad_norm": 6.894109265636699, "learning_rate": 9.700928783646804e-06, "loss": 17.7868, "step": 7514 }, { "epoch": 0.13736816129562945, "grad_norm": 5.633288824085069, "learning_rate": 9.70082793546331e-06, "loss": 17.3854, "step": 7515 }, { "epoch": 0.13738644049207596, "grad_norm": 6.582019190776378, "learning_rate": 9.700727070803822e-06, "loss": 17.5675, "step": 7516 }, { "epoch": 0.1374047196885225, "grad_norm": 7.911740293895738, "learning_rate": 9.700626189668694e-06, "loss": 17.9536, "step": 7517 }, { "epoch": 0.137422998884969, "grad_norm": 7.304427641569884, "learning_rate": 9.700525292058278e-06, "loss": 18.1302, "step": 7518 }, { "epoch": 0.13744127808141554, "grad_norm": 6.930641441163955, "learning_rate": 9.700424377972928e-06, "loss": 17.8615, "step": 7519 }, { "epoch": 0.13745955727786208, "grad_norm": 7.1536806801317825, "learning_rate": 9.700323447413e-06, "loss": 18.0929, "step": 7520 }, { "epoch": 0.13747783647430858, "grad_norm": 6.965101634971381, "learning_rate": 9.700222500378846e-06, "loss": 17.8401, "step": 7521 }, { "epoch": 0.13749611567075512, "grad_norm": 8.294417327476186, "learning_rate": 9.700121536870822e-06, "loss": 18.6985, "step": 7522 }, { "epoch": 0.13751439486720163, "grad_norm": 6.73186924609752, "learning_rate": 9.700020556889275e-06, "loss": 17.4993, "step": 7523 }, { "epoch": 0.13753267406364816, "grad_norm": 6.773047454341339, "learning_rate": 9.699919560434568e-06, "loss": 17.4125, "step": 7524 }, { "epoch": 0.1375509532600947, "grad_norm": 7.670128620195902, "learning_rate": 9.69981854750705e-06, "loss": 18.2021, "step": 7525 }, { "epoch": 0.1375692324565412, "grad_norm": 6.6562117075155856, "learning_rate": 9.699717518107075e-06, "loss": 17.542, "step": 7526 }, { "epoch": 0.13758751165298774, "grad_norm": 7.134487262285221, "learning_rate": 9.699616472234998e-06, "loss": 17.796, "step": 7527 }, { "epoch": 0.13760579084943425, "grad_norm": 8.67954375693659, "learning_rate": 9.699515409891173e-06, "loss": 19.0484, "step": 7528 }, { "epoch": 0.1376240700458808, "grad_norm": 6.161607217740553, "learning_rate": 9.699414331075955e-06, "loss": 17.453, "step": 7529 }, { "epoch": 0.1376423492423273, "grad_norm": 6.842734209054109, "learning_rate": 9.699313235789698e-06, "loss": 17.7173, "step": 7530 }, { "epoch": 0.13766062843877383, "grad_norm": 6.833805176580314, "learning_rate": 9.699212124032754e-06, "loss": 17.4478, "step": 7531 }, { "epoch": 0.13767890763522037, "grad_norm": 7.496615619801857, "learning_rate": 9.699110995805481e-06, "loss": 18.0135, "step": 7532 }, { "epoch": 0.13769718683166687, "grad_norm": 7.622864562921994, "learning_rate": 9.69900985110823e-06, "loss": 18.4149, "step": 7533 }, { "epoch": 0.1377154660281134, "grad_norm": 7.754625756622168, "learning_rate": 9.698908689941358e-06, "loss": 18.1251, "step": 7534 }, { "epoch": 0.13773374522455992, "grad_norm": 6.3318055787919, "learning_rate": 9.69880751230522e-06, "loss": 17.5096, "step": 7535 }, { "epoch": 0.13775202442100645, "grad_norm": 6.6294924082068, "learning_rate": 9.698706318200169e-06, "loss": 17.6423, "step": 7536 }, { "epoch": 0.137770303617453, "grad_norm": 6.916513568451425, "learning_rate": 9.698605107626559e-06, "loss": 17.878, "step": 7537 }, { "epoch": 0.1377885828138995, "grad_norm": 6.199471801380165, "learning_rate": 9.698503880584746e-06, "loss": 17.4741, "step": 7538 }, { "epoch": 0.13780686201034603, "grad_norm": 7.6927948140504325, "learning_rate": 9.698402637075085e-06, "loss": 18.1918, "step": 7539 }, { "epoch": 0.13782514120679254, "grad_norm": 7.90499299859101, "learning_rate": 9.698301377097929e-06, "loss": 17.8412, "step": 7540 }, { "epoch": 0.13784342040323908, "grad_norm": 6.846420909602271, "learning_rate": 9.698200100653636e-06, "loss": 17.5772, "step": 7541 }, { "epoch": 0.1378616995996856, "grad_norm": 6.531231633745436, "learning_rate": 9.698098807742559e-06, "loss": 17.613, "step": 7542 }, { "epoch": 0.13787997879613212, "grad_norm": 6.329550184621328, "learning_rate": 9.697997498365054e-06, "loss": 17.4516, "step": 7543 }, { "epoch": 0.13789825799257865, "grad_norm": 6.818289281580703, "learning_rate": 9.697896172521475e-06, "loss": 17.9329, "step": 7544 }, { "epoch": 0.13791653718902516, "grad_norm": 6.314645548487325, "learning_rate": 9.697794830212178e-06, "loss": 17.4605, "step": 7545 }, { "epoch": 0.1379348163854717, "grad_norm": 5.9485092370941395, "learning_rate": 9.697693471437516e-06, "loss": 17.2601, "step": 7546 }, { "epoch": 0.1379530955819182, "grad_norm": 6.979378822346856, "learning_rate": 9.697592096197849e-06, "loss": 17.9736, "step": 7547 }, { "epoch": 0.13797137477836474, "grad_norm": 6.219796983006217, "learning_rate": 9.697490704493527e-06, "loss": 17.4588, "step": 7548 }, { "epoch": 0.13798965397481128, "grad_norm": 7.0799748577732355, "learning_rate": 9.697389296324908e-06, "loss": 18.1643, "step": 7549 }, { "epoch": 0.13800793317125779, "grad_norm": 7.552326328445858, "learning_rate": 9.697287871692349e-06, "loss": 17.6755, "step": 7550 }, { "epoch": 0.13802621236770432, "grad_norm": 6.084133107890047, "learning_rate": 9.697186430596201e-06, "loss": 17.4025, "step": 7551 }, { "epoch": 0.13804449156415083, "grad_norm": 7.4388834971540225, "learning_rate": 9.697084973036823e-06, "loss": 17.9366, "step": 7552 }, { "epoch": 0.13806277076059736, "grad_norm": 5.488343762675065, "learning_rate": 9.696983499014572e-06, "loss": 17.1962, "step": 7553 }, { "epoch": 0.1380810499570439, "grad_norm": 6.007124088766023, "learning_rate": 9.696882008529797e-06, "loss": 17.4313, "step": 7554 }, { "epoch": 0.1380993291534904, "grad_norm": 8.488478544872322, "learning_rate": 9.696780501582862e-06, "loss": 18.1383, "step": 7555 }, { "epoch": 0.13811760834993694, "grad_norm": 5.140191295738998, "learning_rate": 9.696678978174118e-06, "loss": 17.0095, "step": 7556 }, { "epoch": 0.13813588754638345, "grad_norm": 7.613492131171581, "learning_rate": 9.69657743830392e-06, "loss": 18.0298, "step": 7557 }, { "epoch": 0.13815416674283, "grad_norm": 9.479618418224536, "learning_rate": 9.696475881972627e-06, "loss": 18.2041, "step": 7558 }, { "epoch": 0.13817244593927652, "grad_norm": 7.058720172453753, "learning_rate": 9.696374309180593e-06, "loss": 17.9852, "step": 7559 }, { "epoch": 0.13819072513572303, "grad_norm": 6.269282928623162, "learning_rate": 9.696272719928177e-06, "loss": 17.4109, "step": 7560 }, { "epoch": 0.13820900433216957, "grad_norm": 7.357859970941546, "learning_rate": 9.69617111421573e-06, "loss": 18.0383, "step": 7561 }, { "epoch": 0.13822728352861607, "grad_norm": 7.68302082306943, "learning_rate": 9.696069492043611e-06, "loss": 18.0667, "step": 7562 }, { "epoch": 0.1382455627250626, "grad_norm": 6.386505217125653, "learning_rate": 9.695967853412177e-06, "loss": 17.5746, "step": 7563 }, { "epoch": 0.13826384192150912, "grad_norm": 6.0812781948558365, "learning_rate": 9.695866198321782e-06, "loss": 17.4049, "step": 7564 }, { "epoch": 0.13828212111795565, "grad_norm": 7.885285814407381, "learning_rate": 9.695764526772784e-06, "loss": 18.2242, "step": 7565 }, { "epoch": 0.1383004003144022, "grad_norm": 8.503405783523597, "learning_rate": 9.69566283876554e-06, "loss": 18.4756, "step": 7566 }, { "epoch": 0.1383186795108487, "grad_norm": 7.325141136986131, "learning_rate": 9.695561134300403e-06, "loss": 17.6815, "step": 7567 }, { "epoch": 0.13833695870729523, "grad_norm": 6.819358139332634, "learning_rate": 9.695459413377732e-06, "loss": 17.6192, "step": 7568 }, { "epoch": 0.13835523790374174, "grad_norm": 7.196714241431053, "learning_rate": 9.695357675997886e-06, "loss": 17.8749, "step": 7569 }, { "epoch": 0.13837351710018828, "grad_norm": 7.124930042216272, "learning_rate": 9.695255922161216e-06, "loss": 17.8248, "step": 7570 }, { "epoch": 0.1383917962966348, "grad_norm": 7.577439664001599, "learning_rate": 9.695154151868082e-06, "loss": 18.048, "step": 7571 }, { "epoch": 0.13841007549308132, "grad_norm": 5.946709674463609, "learning_rate": 9.69505236511884e-06, "loss": 17.3948, "step": 7572 }, { "epoch": 0.13842835468952785, "grad_norm": 6.849109850690806, "learning_rate": 9.694950561913847e-06, "loss": 17.8114, "step": 7573 }, { "epoch": 0.13844663388597436, "grad_norm": 6.254112894892067, "learning_rate": 9.69484874225346e-06, "loss": 17.3582, "step": 7574 }, { "epoch": 0.1384649130824209, "grad_norm": 6.83043995617411, "learning_rate": 9.694746906138037e-06, "loss": 17.7437, "step": 7575 }, { "epoch": 0.13848319227886743, "grad_norm": 6.8425114925436485, "learning_rate": 9.69464505356793e-06, "loss": 17.5462, "step": 7576 }, { "epoch": 0.13850147147531394, "grad_norm": 8.338983089259113, "learning_rate": 9.694543184543503e-06, "loss": 17.8099, "step": 7577 }, { "epoch": 0.13851975067176048, "grad_norm": 8.441731655404608, "learning_rate": 9.694441299065108e-06, "loss": 18.0928, "step": 7578 }, { "epoch": 0.13853802986820699, "grad_norm": 7.39996527709027, "learning_rate": 9.694339397133103e-06, "loss": 18.0476, "step": 7579 }, { "epoch": 0.13855630906465352, "grad_norm": 6.481565025506474, "learning_rate": 9.694237478747845e-06, "loss": 17.4842, "step": 7580 }, { "epoch": 0.13857458826110003, "grad_norm": 7.5816203621605744, "learning_rate": 9.694135543909695e-06, "loss": 18.1815, "step": 7581 }, { "epoch": 0.13859286745754656, "grad_norm": 6.556496899939846, "learning_rate": 9.694033592619005e-06, "loss": 17.7246, "step": 7582 }, { "epoch": 0.1386111466539931, "grad_norm": 9.199344677937912, "learning_rate": 9.693931624876134e-06, "loss": 19.1485, "step": 7583 }, { "epoch": 0.1386294258504396, "grad_norm": 8.306551459532058, "learning_rate": 9.693829640681443e-06, "loss": 17.9562, "step": 7584 }, { "epoch": 0.13864770504688614, "grad_norm": 7.317573061319512, "learning_rate": 9.693727640035284e-06, "loss": 18.0196, "step": 7585 }, { "epoch": 0.13866598424333265, "grad_norm": 6.088249381911165, "learning_rate": 9.693625622938016e-06, "loss": 17.2675, "step": 7586 }, { "epoch": 0.1386842634397792, "grad_norm": 8.32568238400912, "learning_rate": 9.69352358939e-06, "loss": 18.5118, "step": 7587 }, { "epoch": 0.13870254263622572, "grad_norm": 6.881622182942428, "learning_rate": 9.69342153939159e-06, "loss": 17.7963, "step": 7588 }, { "epoch": 0.13872082183267223, "grad_norm": 7.233722115198136, "learning_rate": 9.693319472943144e-06, "loss": 18.0334, "step": 7589 }, { "epoch": 0.13873910102911877, "grad_norm": 6.933665393243024, "learning_rate": 9.693217390045022e-06, "loss": 17.3835, "step": 7590 }, { "epoch": 0.13875738022556527, "grad_norm": 6.877804478341359, "learning_rate": 9.693115290697579e-06, "loss": 17.7326, "step": 7591 }, { "epoch": 0.1387756594220118, "grad_norm": 6.088325576611347, "learning_rate": 9.693013174901176e-06, "loss": 17.442, "step": 7592 }, { "epoch": 0.13879393861845835, "grad_norm": 5.491087823667131, "learning_rate": 9.692911042656168e-06, "loss": 17.1538, "step": 7593 }, { "epoch": 0.13881221781490485, "grad_norm": 6.739863673958903, "learning_rate": 9.692808893962913e-06, "loss": 17.9538, "step": 7594 }, { "epoch": 0.1388304970113514, "grad_norm": 7.724576329318004, "learning_rate": 9.69270672882177e-06, "loss": 18.2345, "step": 7595 }, { "epoch": 0.1388487762077979, "grad_norm": 8.162198807594477, "learning_rate": 9.6926045472331e-06, "loss": 18.0554, "step": 7596 }, { "epoch": 0.13886705540424443, "grad_norm": 6.825174864807807, "learning_rate": 9.692502349197255e-06, "loss": 17.8946, "step": 7597 }, { "epoch": 0.13888533460069094, "grad_norm": 6.685053782309902, "learning_rate": 9.692400134714597e-06, "loss": 17.8092, "step": 7598 }, { "epoch": 0.13890361379713748, "grad_norm": 7.50105563200056, "learning_rate": 9.692297903785485e-06, "loss": 17.4689, "step": 7599 }, { "epoch": 0.138921892993584, "grad_norm": 8.213849230114867, "learning_rate": 9.692195656410276e-06, "loss": 18.4874, "step": 7600 }, { "epoch": 0.13894017219003052, "grad_norm": 7.210729939117689, "learning_rate": 9.692093392589328e-06, "loss": 17.9628, "step": 7601 }, { "epoch": 0.13895845138647706, "grad_norm": 5.780872468116353, "learning_rate": 9.691991112323e-06, "loss": 17.5676, "step": 7602 }, { "epoch": 0.13897673058292356, "grad_norm": 7.7448713027600355, "learning_rate": 9.69188881561165e-06, "loss": 18.3937, "step": 7603 }, { "epoch": 0.1389950097793701, "grad_norm": 6.451954438687236, "learning_rate": 9.691786502455637e-06, "loss": 17.5783, "step": 7604 }, { "epoch": 0.13901328897581663, "grad_norm": 7.375328926641005, "learning_rate": 9.691684172855318e-06, "loss": 18.0649, "step": 7605 }, { "epoch": 0.13903156817226314, "grad_norm": 7.42328733850513, "learning_rate": 9.691581826811056e-06, "loss": 18.0613, "step": 7606 }, { "epoch": 0.13904984736870968, "grad_norm": 7.27599208052216, "learning_rate": 9.691479464323205e-06, "loss": 17.9711, "step": 7607 }, { "epoch": 0.13906812656515619, "grad_norm": 6.195330420273026, "learning_rate": 9.691377085392126e-06, "loss": 17.3167, "step": 7608 }, { "epoch": 0.13908640576160272, "grad_norm": 7.422560638770763, "learning_rate": 9.691274690018177e-06, "loss": 17.9469, "step": 7609 }, { "epoch": 0.13910468495804926, "grad_norm": 7.982168416130319, "learning_rate": 9.691172278201717e-06, "loss": 18.3047, "step": 7610 }, { "epoch": 0.13912296415449577, "grad_norm": 6.823450716965158, "learning_rate": 9.691069849943106e-06, "loss": 17.7405, "step": 7611 }, { "epoch": 0.1391412433509423, "grad_norm": 6.8991255450433995, "learning_rate": 9.690967405242702e-06, "loss": 17.9281, "step": 7612 }, { "epoch": 0.1391595225473888, "grad_norm": 7.549502658587642, "learning_rate": 9.690864944100864e-06, "loss": 18.1085, "step": 7613 }, { "epoch": 0.13917780174383534, "grad_norm": 7.010243806112929, "learning_rate": 9.690762466517953e-06, "loss": 17.9053, "step": 7614 }, { "epoch": 0.13919608094028185, "grad_norm": 6.2672935541821335, "learning_rate": 9.690659972494325e-06, "loss": 17.5679, "step": 7615 }, { "epoch": 0.1392143601367284, "grad_norm": 6.51920355904111, "learning_rate": 9.69055746203034e-06, "loss": 17.7973, "step": 7616 }, { "epoch": 0.13923263933317492, "grad_norm": 4.972536691426739, "learning_rate": 9.690454935126362e-06, "loss": 17.0055, "step": 7617 }, { "epoch": 0.13925091852962143, "grad_norm": 9.16414804210936, "learning_rate": 9.690352391782742e-06, "loss": 18.683, "step": 7618 }, { "epoch": 0.13926919772606797, "grad_norm": 6.353458986930466, "learning_rate": 9.690249831999845e-06, "loss": 17.6393, "step": 7619 }, { "epoch": 0.13928747692251447, "grad_norm": 7.549727257799057, "learning_rate": 9.69014725577803e-06, "loss": 17.8279, "step": 7620 }, { "epoch": 0.139305756118961, "grad_norm": 6.459371878675763, "learning_rate": 9.690044663117657e-06, "loss": 17.7354, "step": 7621 }, { "epoch": 0.13932403531540755, "grad_norm": 7.096093754094462, "learning_rate": 9.689942054019084e-06, "loss": 17.8056, "step": 7622 }, { "epoch": 0.13934231451185405, "grad_norm": 5.646822138702501, "learning_rate": 9.689839428482668e-06, "loss": 17.2833, "step": 7623 }, { "epoch": 0.1393605937083006, "grad_norm": 6.392818847550784, "learning_rate": 9.689736786508775e-06, "loss": 17.5295, "step": 7624 }, { "epoch": 0.1393788729047471, "grad_norm": 6.543122674993934, "learning_rate": 9.68963412809776e-06, "loss": 17.5072, "step": 7625 }, { "epoch": 0.13939715210119363, "grad_norm": 6.044145106145599, "learning_rate": 9.689531453249985e-06, "loss": 17.4496, "step": 7626 }, { "epoch": 0.13941543129764017, "grad_norm": 6.320195166276094, "learning_rate": 9.689428761965812e-06, "loss": 17.6063, "step": 7627 }, { "epoch": 0.13943371049408668, "grad_norm": 5.93275108209908, "learning_rate": 9.689326054245594e-06, "loss": 17.4617, "step": 7628 }, { "epoch": 0.1394519896905332, "grad_norm": 7.48701383614776, "learning_rate": 9.689223330089697e-06, "loss": 18.0072, "step": 7629 }, { "epoch": 0.13947026888697972, "grad_norm": 7.892054430291195, "learning_rate": 9.689120589498478e-06, "loss": 18.23, "step": 7630 }, { "epoch": 0.13948854808342626, "grad_norm": 8.388621093175491, "learning_rate": 9.689017832472298e-06, "loss": 18.298, "step": 7631 }, { "epoch": 0.13950682727987276, "grad_norm": 7.940115082999377, "learning_rate": 9.688915059011519e-06, "loss": 17.8922, "step": 7632 }, { "epoch": 0.1395251064763193, "grad_norm": 6.6214486318031405, "learning_rate": 9.688812269116498e-06, "loss": 17.3944, "step": 7633 }, { "epoch": 0.13954338567276584, "grad_norm": 8.291700876812332, "learning_rate": 9.688709462787598e-06, "loss": 18.3481, "step": 7634 }, { "epoch": 0.13956166486921234, "grad_norm": 8.239108156558169, "learning_rate": 9.688606640025178e-06, "loss": 18.5325, "step": 7635 }, { "epoch": 0.13957994406565888, "grad_norm": 7.025021516402914, "learning_rate": 9.6885038008296e-06, "loss": 17.9765, "step": 7636 }, { "epoch": 0.1395982232621054, "grad_norm": 6.098258722584013, "learning_rate": 9.68840094520122e-06, "loss": 17.4001, "step": 7637 }, { "epoch": 0.13961650245855192, "grad_norm": 7.6009707832906725, "learning_rate": 9.688298073140403e-06, "loss": 18.0495, "step": 7638 }, { "epoch": 0.13963478165499846, "grad_norm": 7.40103737490222, "learning_rate": 9.688195184647509e-06, "loss": 17.8624, "step": 7639 }, { "epoch": 0.13965306085144497, "grad_norm": 8.4314965735498, "learning_rate": 9.688092279722896e-06, "loss": 18.8786, "step": 7640 }, { "epoch": 0.1396713400478915, "grad_norm": 6.055062072160815, "learning_rate": 9.687989358366927e-06, "loss": 17.6618, "step": 7641 }, { "epoch": 0.139689619244338, "grad_norm": 6.05575625200334, "learning_rate": 9.687886420579962e-06, "loss": 17.3134, "step": 7642 }, { "epoch": 0.13970789844078454, "grad_norm": 8.622498771976538, "learning_rate": 9.687783466362362e-06, "loss": 18.3974, "step": 7643 }, { "epoch": 0.13972617763723108, "grad_norm": 5.865694263033465, "learning_rate": 9.687680495714488e-06, "loss": 17.2247, "step": 7644 }, { "epoch": 0.1397444568336776, "grad_norm": 6.811234057849274, "learning_rate": 9.6875775086367e-06, "loss": 17.7938, "step": 7645 }, { "epoch": 0.13976273603012412, "grad_norm": 7.106931351353856, "learning_rate": 9.687474505129362e-06, "loss": 18.1443, "step": 7646 }, { "epoch": 0.13978101522657063, "grad_norm": 6.482267760104404, "learning_rate": 9.687371485192831e-06, "loss": 17.4304, "step": 7647 }, { "epoch": 0.13979929442301717, "grad_norm": 7.974894210989417, "learning_rate": 9.687268448827468e-06, "loss": 18.3447, "step": 7648 }, { "epoch": 0.13981757361946368, "grad_norm": 7.5935022550416456, "learning_rate": 9.687165396033638e-06, "loss": 18.0296, "step": 7649 }, { "epoch": 0.1398358528159102, "grad_norm": 6.468510798971305, "learning_rate": 9.6870623268117e-06, "loss": 17.866, "step": 7650 }, { "epoch": 0.13985413201235675, "grad_norm": 7.5046531587992735, "learning_rate": 9.686959241162013e-06, "loss": 17.7633, "step": 7651 }, { "epoch": 0.13987241120880325, "grad_norm": 7.670355311867818, "learning_rate": 9.686856139084943e-06, "loss": 18.0413, "step": 7652 }, { "epoch": 0.1398906904052498, "grad_norm": 8.67865132995876, "learning_rate": 9.686753020580847e-06, "loss": 18.3526, "step": 7653 }, { "epoch": 0.1399089696016963, "grad_norm": 9.344161512670489, "learning_rate": 9.68664988565009e-06, "loss": 18.4226, "step": 7654 }, { "epoch": 0.13992724879814283, "grad_norm": 7.732963302029577, "learning_rate": 9.686546734293032e-06, "loss": 17.8059, "step": 7655 }, { "epoch": 0.13994552799458937, "grad_norm": 5.753291438789974, "learning_rate": 9.686443566510033e-06, "loss": 17.315, "step": 7656 }, { "epoch": 0.13996380719103588, "grad_norm": 6.825847797501684, "learning_rate": 9.686340382301457e-06, "loss": 17.8254, "step": 7657 }, { "epoch": 0.1399820863874824, "grad_norm": 9.393549575400462, "learning_rate": 9.686237181667664e-06, "loss": 18.7241, "step": 7658 }, { "epoch": 0.14000036558392892, "grad_norm": 5.97960450460996, "learning_rate": 9.686133964609017e-06, "loss": 17.4859, "step": 7659 }, { "epoch": 0.14001864478037546, "grad_norm": 8.19156414760745, "learning_rate": 9.686030731125877e-06, "loss": 18.161, "step": 7660 }, { "epoch": 0.140036923976822, "grad_norm": 7.13560642700512, "learning_rate": 9.685927481218605e-06, "loss": 17.7538, "step": 7661 }, { "epoch": 0.1400552031732685, "grad_norm": 7.955656266645502, "learning_rate": 9.685824214887565e-06, "loss": 18.4976, "step": 7662 }, { "epoch": 0.14007348236971504, "grad_norm": 10.783803224347071, "learning_rate": 9.685720932133117e-06, "loss": 18.1724, "step": 7663 }, { "epoch": 0.14009176156616154, "grad_norm": 7.623917149763039, "learning_rate": 9.685617632955625e-06, "loss": 18.119, "step": 7664 }, { "epoch": 0.14011004076260808, "grad_norm": 6.780303406814478, "learning_rate": 9.685514317355446e-06, "loss": 17.2607, "step": 7665 }, { "epoch": 0.1401283199590546, "grad_norm": 6.972500743781565, "learning_rate": 9.685410985332951e-06, "loss": 17.7824, "step": 7666 }, { "epoch": 0.14014659915550112, "grad_norm": 6.729420002075346, "learning_rate": 9.685307636888494e-06, "loss": 17.4817, "step": 7667 }, { "epoch": 0.14016487835194766, "grad_norm": 7.774515179824839, "learning_rate": 9.685204272022442e-06, "loss": 18.115, "step": 7668 }, { "epoch": 0.14018315754839417, "grad_norm": 6.472736167199539, "learning_rate": 9.685100890735153e-06, "loss": 17.8969, "step": 7669 }, { "epoch": 0.1402014367448407, "grad_norm": 7.681000135664825, "learning_rate": 9.684997493026994e-06, "loss": 18.0205, "step": 7670 }, { "epoch": 0.1402197159412872, "grad_norm": 8.360965006565488, "learning_rate": 9.684894078898325e-06, "loss": 18.1404, "step": 7671 }, { "epoch": 0.14023799513773375, "grad_norm": 6.861502415358219, "learning_rate": 9.68479064834951e-06, "loss": 17.7546, "step": 7672 }, { "epoch": 0.14025627433418028, "grad_norm": 7.025707723981484, "learning_rate": 9.684687201380908e-06, "loss": 17.9243, "step": 7673 }, { "epoch": 0.1402745535306268, "grad_norm": 6.833530124182017, "learning_rate": 9.684583737992884e-06, "loss": 17.7647, "step": 7674 }, { "epoch": 0.14029283272707332, "grad_norm": 6.082676889038833, "learning_rate": 9.684480258185802e-06, "loss": 17.3952, "step": 7675 }, { "epoch": 0.14031111192351983, "grad_norm": 8.40792409343925, "learning_rate": 9.684376761960022e-06, "loss": 18.2987, "step": 7676 }, { "epoch": 0.14032939111996637, "grad_norm": 7.225649769389998, "learning_rate": 9.684273249315909e-06, "loss": 18.0845, "step": 7677 }, { "epoch": 0.1403476703164129, "grad_norm": 6.613362749393355, "learning_rate": 9.684169720253824e-06, "loss": 17.6433, "step": 7678 }, { "epoch": 0.1403659495128594, "grad_norm": 6.17401684762471, "learning_rate": 9.68406617477413e-06, "loss": 17.6461, "step": 7679 }, { "epoch": 0.14038422870930595, "grad_norm": 5.907300268861695, "learning_rate": 9.683962612877191e-06, "loss": 17.4147, "step": 7680 }, { "epoch": 0.14040250790575245, "grad_norm": 6.438438388115875, "learning_rate": 9.68385903456337e-06, "loss": 17.7628, "step": 7681 }, { "epoch": 0.140420787102199, "grad_norm": 7.15609337283895, "learning_rate": 9.683755439833029e-06, "loss": 17.9813, "step": 7682 }, { "epoch": 0.1404390662986455, "grad_norm": 8.407826266878, "learning_rate": 9.683651828686533e-06, "loss": 18.6392, "step": 7683 }, { "epoch": 0.14045734549509203, "grad_norm": 7.438446230283374, "learning_rate": 9.683548201124242e-06, "loss": 18.1408, "step": 7684 }, { "epoch": 0.14047562469153857, "grad_norm": 5.922113971888481, "learning_rate": 9.683444557146522e-06, "loss": 17.4242, "step": 7685 }, { "epoch": 0.14049390388798508, "grad_norm": 7.767049220233212, "learning_rate": 9.683340896753736e-06, "loss": 18.243, "step": 7686 }, { "epoch": 0.1405121830844316, "grad_norm": 7.7351425289605285, "learning_rate": 9.683237219946244e-06, "loss": 17.8322, "step": 7687 }, { "epoch": 0.14053046228087812, "grad_norm": 5.766262580266691, "learning_rate": 9.683133526724413e-06, "loss": 17.4055, "step": 7688 }, { "epoch": 0.14054874147732466, "grad_norm": 5.425520660615894, "learning_rate": 9.683029817088608e-06, "loss": 17.2274, "step": 7689 }, { "epoch": 0.1405670206737712, "grad_norm": 7.575820684465325, "learning_rate": 9.682926091039187e-06, "loss": 18.0752, "step": 7690 }, { "epoch": 0.1405852998702177, "grad_norm": 8.092227554164923, "learning_rate": 9.682822348576518e-06, "loss": 17.9636, "step": 7691 }, { "epoch": 0.14060357906666424, "grad_norm": 7.253130689899035, "learning_rate": 9.68271858970096e-06, "loss": 17.9492, "step": 7692 }, { "epoch": 0.14062185826311074, "grad_norm": 8.145629293534444, "learning_rate": 9.682614814412883e-06, "loss": 18.4608, "step": 7693 }, { "epoch": 0.14064013745955728, "grad_norm": 7.160254710914674, "learning_rate": 9.682511022712646e-06, "loss": 17.9893, "step": 7694 }, { "epoch": 0.14065841665600382, "grad_norm": 7.357299262847844, "learning_rate": 9.682407214600615e-06, "loss": 18.0813, "step": 7695 }, { "epoch": 0.14067669585245032, "grad_norm": 8.08579147665916, "learning_rate": 9.682303390077153e-06, "loss": 17.9742, "step": 7696 }, { "epoch": 0.14069497504889686, "grad_norm": 6.973170726147441, "learning_rate": 9.682199549142623e-06, "loss": 17.6363, "step": 7697 }, { "epoch": 0.14071325424534337, "grad_norm": 6.969016413291826, "learning_rate": 9.682095691797391e-06, "loss": 18.1067, "step": 7698 }, { "epoch": 0.1407315334417899, "grad_norm": 7.506545156191458, "learning_rate": 9.681991818041818e-06, "loss": 18.1744, "step": 7699 }, { "epoch": 0.1407498126382364, "grad_norm": 7.9121187519485705, "learning_rate": 9.681887927876271e-06, "loss": 17.9692, "step": 7700 }, { "epoch": 0.14076809183468295, "grad_norm": 8.679743774834556, "learning_rate": 9.681784021301112e-06, "loss": 17.9166, "step": 7701 }, { "epoch": 0.14078637103112948, "grad_norm": 7.024434714046227, "learning_rate": 9.68168009831671e-06, "loss": 17.8942, "step": 7702 }, { "epoch": 0.140804650227576, "grad_norm": 7.349989985891914, "learning_rate": 9.681576158923423e-06, "loss": 18.1657, "step": 7703 }, { "epoch": 0.14082292942402252, "grad_norm": 8.459378945845003, "learning_rate": 9.681472203121617e-06, "loss": 18.7275, "step": 7704 }, { "epoch": 0.14084120862046903, "grad_norm": 9.695222479773127, "learning_rate": 9.681368230911659e-06, "loss": 18.4332, "step": 7705 }, { "epoch": 0.14085948781691557, "grad_norm": 7.186765750034922, "learning_rate": 9.68126424229391e-06, "loss": 18.0776, "step": 7706 }, { "epoch": 0.1408777670133621, "grad_norm": 6.854102062564552, "learning_rate": 9.681160237268737e-06, "loss": 17.9172, "step": 7707 }, { "epoch": 0.1408960462098086, "grad_norm": 7.3632229794090005, "learning_rate": 9.681056215836501e-06, "loss": 18.0963, "step": 7708 }, { "epoch": 0.14091432540625515, "grad_norm": 7.7221081410587455, "learning_rate": 9.680952177997572e-06, "loss": 18.1901, "step": 7709 }, { "epoch": 0.14093260460270166, "grad_norm": 5.924403423489709, "learning_rate": 9.680848123752312e-06, "loss": 17.3988, "step": 7710 }, { "epoch": 0.1409508837991482, "grad_norm": 6.993460661737724, "learning_rate": 9.680744053101084e-06, "loss": 17.8039, "step": 7711 }, { "epoch": 0.14096916299559473, "grad_norm": 8.421952771436054, "learning_rate": 9.680639966044256e-06, "loss": 18.4236, "step": 7712 }, { "epoch": 0.14098744219204123, "grad_norm": 6.789647333264264, "learning_rate": 9.68053586258219e-06, "loss": 17.6687, "step": 7713 }, { "epoch": 0.14100572138848777, "grad_norm": 7.722165563852823, "learning_rate": 9.680431742715252e-06, "loss": 17.7669, "step": 7714 }, { "epoch": 0.14102400058493428, "grad_norm": 7.719011591664162, "learning_rate": 9.680327606443806e-06, "loss": 18.1179, "step": 7715 }, { "epoch": 0.1410422797813808, "grad_norm": 7.767842326935953, "learning_rate": 9.680223453768219e-06, "loss": 18.1272, "step": 7716 }, { "epoch": 0.14106055897782732, "grad_norm": 7.5680030448530955, "learning_rate": 9.680119284688855e-06, "loss": 17.723, "step": 7717 }, { "epoch": 0.14107883817427386, "grad_norm": 7.7507445436356015, "learning_rate": 9.68001509920608e-06, "loss": 18.0347, "step": 7718 }, { "epoch": 0.1410971173707204, "grad_norm": 7.307328906945655, "learning_rate": 9.679910897320254e-06, "loss": 17.8308, "step": 7719 }, { "epoch": 0.1411153965671669, "grad_norm": 7.951724740756475, "learning_rate": 9.679806679031751e-06, "loss": 18.0057, "step": 7720 }, { "epoch": 0.14113367576361344, "grad_norm": 6.778070343960303, "learning_rate": 9.67970244434093e-06, "loss": 17.4964, "step": 7721 }, { "epoch": 0.14115195496005994, "grad_norm": 8.07497389710269, "learning_rate": 9.679598193248159e-06, "loss": 18.3359, "step": 7722 }, { "epoch": 0.14117023415650648, "grad_norm": 6.6248376575767605, "learning_rate": 9.6794939257538e-06, "loss": 17.7897, "step": 7723 }, { "epoch": 0.14118851335295302, "grad_norm": 5.905110351666292, "learning_rate": 9.679389641858224e-06, "loss": 17.2798, "step": 7724 }, { "epoch": 0.14120679254939952, "grad_norm": 7.142681101724337, "learning_rate": 9.67928534156179e-06, "loss": 17.8447, "step": 7725 }, { "epoch": 0.14122507174584606, "grad_norm": 6.413274281557343, "learning_rate": 9.679181024864869e-06, "loss": 17.6408, "step": 7726 }, { "epoch": 0.14124335094229257, "grad_norm": 8.088982050404411, "learning_rate": 9.679076691767823e-06, "loss": 18.3308, "step": 7727 }, { "epoch": 0.1412616301387391, "grad_norm": 7.639545561389265, "learning_rate": 9.678972342271023e-06, "loss": 17.6528, "step": 7728 }, { "epoch": 0.14127990933518564, "grad_norm": 8.758924487537499, "learning_rate": 9.678867976374827e-06, "loss": 18.6422, "step": 7729 }, { "epoch": 0.14129818853163215, "grad_norm": 7.8443891245671065, "learning_rate": 9.678763594079605e-06, "loss": 18.2581, "step": 7730 }, { "epoch": 0.14131646772807868, "grad_norm": 8.158889575581766, "learning_rate": 9.678659195385724e-06, "loss": 17.9907, "step": 7731 }, { "epoch": 0.1413347469245252, "grad_norm": 7.128034971801864, "learning_rate": 9.67855478029355e-06, "loss": 18.0491, "step": 7732 }, { "epoch": 0.14135302612097173, "grad_norm": 7.524698700334921, "learning_rate": 9.678450348803445e-06, "loss": 18.0207, "step": 7733 }, { "epoch": 0.14137130531741823, "grad_norm": 7.413578969658936, "learning_rate": 9.678345900915778e-06, "loss": 17.9179, "step": 7734 }, { "epoch": 0.14138958451386477, "grad_norm": 6.944010819432446, "learning_rate": 9.678241436630916e-06, "loss": 17.7795, "step": 7735 }, { "epoch": 0.1414078637103113, "grad_norm": 9.25296851022195, "learning_rate": 9.67813695594922e-06, "loss": 18.8079, "step": 7736 }, { "epoch": 0.1414261429067578, "grad_norm": 7.284723548912895, "learning_rate": 9.678032458871063e-06, "loss": 18.1341, "step": 7737 }, { "epoch": 0.14144442210320435, "grad_norm": 8.14959581041399, "learning_rate": 9.677927945396808e-06, "loss": 18.3563, "step": 7738 }, { "epoch": 0.14146270129965086, "grad_norm": 7.108870755550117, "learning_rate": 9.677823415526822e-06, "loss": 17.8498, "step": 7739 }, { "epoch": 0.1414809804960974, "grad_norm": 6.407079401501023, "learning_rate": 9.67771886926147e-06, "loss": 17.5913, "step": 7740 }, { "epoch": 0.14149925969254393, "grad_norm": 7.224881529647223, "learning_rate": 9.67761430660112e-06, "loss": 17.9471, "step": 7741 }, { "epoch": 0.14151753888899044, "grad_norm": 6.837148647998776, "learning_rate": 9.677509727546134e-06, "loss": 17.4927, "step": 7742 }, { "epoch": 0.14153581808543697, "grad_norm": 7.846606169916832, "learning_rate": 9.677405132096887e-06, "loss": 18.4396, "step": 7743 }, { "epoch": 0.14155409728188348, "grad_norm": 7.524877752546834, "learning_rate": 9.677300520253738e-06, "loss": 18.0791, "step": 7744 }, { "epoch": 0.14157237647833001, "grad_norm": 7.487832029774735, "learning_rate": 9.677195892017059e-06, "loss": 17.7253, "step": 7745 }, { "epoch": 0.14159065567477655, "grad_norm": 6.419594968610413, "learning_rate": 9.677091247387214e-06, "loss": 17.5961, "step": 7746 }, { "epoch": 0.14160893487122306, "grad_norm": 7.8628128876797945, "learning_rate": 9.676986586364567e-06, "loss": 18.4315, "step": 7747 }, { "epoch": 0.1416272140676696, "grad_norm": 7.299063522511162, "learning_rate": 9.676881908949492e-06, "loss": 17.8903, "step": 7748 }, { "epoch": 0.1416454932641161, "grad_norm": 5.806594807064861, "learning_rate": 9.676777215142348e-06, "loss": 17.2699, "step": 7749 }, { "epoch": 0.14166377246056264, "grad_norm": 5.770711123757083, "learning_rate": 9.676672504943508e-06, "loss": 17.4297, "step": 7750 }, { "epoch": 0.14168205165700914, "grad_norm": 7.502838539343097, "learning_rate": 9.676567778353337e-06, "loss": 17.9715, "step": 7751 }, { "epoch": 0.14170033085345568, "grad_norm": 6.662998191797699, "learning_rate": 9.6764630353722e-06, "loss": 17.5581, "step": 7752 }, { "epoch": 0.14171861004990222, "grad_norm": 7.416255808314275, "learning_rate": 9.676358276000466e-06, "loss": 17.9441, "step": 7753 }, { "epoch": 0.14173688924634872, "grad_norm": 7.192597832118229, "learning_rate": 9.676253500238503e-06, "loss": 17.8188, "step": 7754 }, { "epoch": 0.14175516844279526, "grad_norm": 6.303949808613113, "learning_rate": 9.676148708086677e-06, "loss": 17.5782, "step": 7755 }, { "epoch": 0.14177344763924177, "grad_norm": 7.537919742112997, "learning_rate": 9.676043899545356e-06, "loss": 18.1434, "step": 7756 }, { "epoch": 0.1417917268356883, "grad_norm": 6.324639285936871, "learning_rate": 9.675939074614907e-06, "loss": 17.4917, "step": 7757 }, { "epoch": 0.14181000603213484, "grad_norm": 6.705794880450858, "learning_rate": 9.675834233295696e-06, "loss": 17.5962, "step": 7758 }, { "epoch": 0.14182828522858135, "grad_norm": 6.831759017623418, "learning_rate": 9.675729375588092e-06, "loss": 17.6193, "step": 7759 }, { "epoch": 0.14184656442502788, "grad_norm": 6.973219084686587, "learning_rate": 9.675624501492462e-06, "loss": 17.7955, "step": 7760 }, { "epoch": 0.1418648436214744, "grad_norm": 6.677520348869061, "learning_rate": 9.675519611009176e-06, "loss": 17.5277, "step": 7761 }, { "epoch": 0.14188312281792093, "grad_norm": 6.567747401858718, "learning_rate": 9.675414704138596e-06, "loss": 17.499, "step": 7762 }, { "epoch": 0.14190140201436746, "grad_norm": 6.144341090505756, "learning_rate": 9.675309780881097e-06, "loss": 17.484, "step": 7763 }, { "epoch": 0.14191968121081397, "grad_norm": 6.445653628061477, "learning_rate": 9.67520484123704e-06, "loss": 17.36, "step": 7764 }, { "epoch": 0.1419379604072605, "grad_norm": 8.560003692490604, "learning_rate": 9.675099885206798e-06, "loss": 18.2682, "step": 7765 }, { "epoch": 0.141956239603707, "grad_norm": 7.859397461991227, "learning_rate": 9.674994912790736e-06, "loss": 18.1824, "step": 7766 }, { "epoch": 0.14197451880015355, "grad_norm": 7.106172855319866, "learning_rate": 9.674889923989222e-06, "loss": 17.9058, "step": 7767 }, { "epoch": 0.14199279799660006, "grad_norm": 7.00829803230192, "learning_rate": 9.674784918802624e-06, "loss": 17.3542, "step": 7768 }, { "epoch": 0.1420110771930466, "grad_norm": 7.173244707430433, "learning_rate": 9.674679897231311e-06, "loss": 17.8225, "step": 7769 }, { "epoch": 0.14202935638949313, "grad_norm": 7.232542256664458, "learning_rate": 9.67457485927565e-06, "loss": 17.7652, "step": 7770 }, { "epoch": 0.14204763558593964, "grad_norm": 7.01716133149273, "learning_rate": 9.674469804936012e-06, "loss": 17.7022, "step": 7771 }, { "epoch": 0.14206591478238617, "grad_norm": 5.810302252713791, "learning_rate": 9.67436473421276e-06, "loss": 17.1869, "step": 7772 }, { "epoch": 0.14208419397883268, "grad_norm": 7.134096152854323, "learning_rate": 9.674259647106268e-06, "loss": 17.8671, "step": 7773 }, { "epoch": 0.14210247317527921, "grad_norm": 6.786937004516573, "learning_rate": 9.6741545436169e-06, "loss": 17.7471, "step": 7774 }, { "epoch": 0.14212075237172575, "grad_norm": 6.500229037330626, "learning_rate": 9.674049423745025e-06, "loss": 17.8578, "step": 7775 }, { "epoch": 0.14213903156817226, "grad_norm": 6.220294935342864, "learning_rate": 9.673944287491013e-06, "loss": 17.4893, "step": 7776 }, { "epoch": 0.1421573107646188, "grad_norm": 6.9788471623255175, "learning_rate": 9.673839134855233e-06, "loss": 17.8216, "step": 7777 }, { "epoch": 0.1421755899610653, "grad_norm": 6.73552468574272, "learning_rate": 9.673733965838053e-06, "loss": 17.7899, "step": 7778 }, { "epoch": 0.14219386915751184, "grad_norm": 7.2627811451102335, "learning_rate": 9.673628780439839e-06, "loss": 17.8894, "step": 7779 }, { "epoch": 0.14221214835395837, "grad_norm": 7.622253576899311, "learning_rate": 9.673523578660962e-06, "loss": 18.0757, "step": 7780 }, { "epoch": 0.14223042755040488, "grad_norm": 6.749261020223755, "learning_rate": 9.67341836050179e-06, "loss": 17.6294, "step": 7781 }, { "epoch": 0.14224870674685142, "grad_norm": 5.852666143775732, "learning_rate": 9.673313125962693e-06, "loss": 17.6006, "step": 7782 }, { "epoch": 0.14226698594329792, "grad_norm": 5.88140172454095, "learning_rate": 9.673207875044039e-06, "loss": 17.4948, "step": 7783 }, { "epoch": 0.14228526513974446, "grad_norm": 7.210294985040148, "learning_rate": 9.673102607746198e-06, "loss": 17.5802, "step": 7784 }, { "epoch": 0.14230354433619097, "grad_norm": 6.494521279404934, "learning_rate": 9.672997324069536e-06, "loss": 17.5406, "step": 7785 }, { "epoch": 0.1423218235326375, "grad_norm": 8.046854484935118, "learning_rate": 9.672892024014426e-06, "loss": 18.7805, "step": 7786 }, { "epoch": 0.14234010272908404, "grad_norm": 7.789728345892877, "learning_rate": 9.672786707581232e-06, "loss": 18.1358, "step": 7787 }, { "epoch": 0.14235838192553055, "grad_norm": 7.264851122492589, "learning_rate": 9.672681374770328e-06, "loss": 17.9878, "step": 7788 }, { "epoch": 0.14237666112197708, "grad_norm": 6.076518274182032, "learning_rate": 9.672576025582081e-06, "loss": 17.3517, "step": 7789 }, { "epoch": 0.1423949403184236, "grad_norm": 6.342632611380603, "learning_rate": 9.672470660016862e-06, "loss": 17.4663, "step": 7790 }, { "epoch": 0.14241321951487013, "grad_norm": 6.879084098925314, "learning_rate": 9.672365278075035e-06, "loss": 17.7861, "step": 7791 }, { "epoch": 0.14243149871131666, "grad_norm": 7.5159207183567025, "learning_rate": 9.672259879756976e-06, "loss": 17.668, "step": 7792 }, { "epoch": 0.14244977790776317, "grad_norm": 8.093294225857699, "learning_rate": 9.672154465063051e-06, "loss": 18.1166, "step": 7793 }, { "epoch": 0.1424680571042097, "grad_norm": 6.536931922435906, "learning_rate": 9.672049033993632e-06, "loss": 17.5174, "step": 7794 }, { "epoch": 0.1424863363006562, "grad_norm": 5.961237974227077, "learning_rate": 9.671943586549085e-06, "loss": 17.2613, "step": 7795 }, { "epoch": 0.14250461549710275, "grad_norm": 7.349228206352338, "learning_rate": 9.67183812272978e-06, "loss": 17.9133, "step": 7796 }, { "epoch": 0.14252289469354928, "grad_norm": 7.074451173895017, "learning_rate": 9.671732642536087e-06, "loss": 17.6657, "step": 7797 }, { "epoch": 0.1425411738899958, "grad_norm": 6.834222829579723, "learning_rate": 9.67162714596838e-06, "loss": 17.8446, "step": 7798 }, { "epoch": 0.14255945308644233, "grad_norm": 6.867635214185919, "learning_rate": 9.671521633027022e-06, "loss": 17.5231, "step": 7799 }, { "epoch": 0.14257773228288884, "grad_norm": 7.517409601017157, "learning_rate": 9.671416103712389e-06, "loss": 18.0342, "step": 7800 }, { "epoch": 0.14259601147933537, "grad_norm": 7.247493154377069, "learning_rate": 9.671310558024844e-06, "loss": 17.6106, "step": 7801 }, { "epoch": 0.14261429067578188, "grad_norm": 8.626717076846722, "learning_rate": 9.671204995964762e-06, "loss": 18.3426, "step": 7802 }, { "epoch": 0.14263256987222842, "grad_norm": 6.661527484937428, "learning_rate": 9.671099417532515e-06, "loss": 17.5053, "step": 7803 }, { "epoch": 0.14265084906867495, "grad_norm": 8.057202515842757, "learning_rate": 9.670993822728467e-06, "loss": 17.7858, "step": 7804 }, { "epoch": 0.14266912826512146, "grad_norm": 7.05030211660043, "learning_rate": 9.670888211552992e-06, "loss": 17.6583, "step": 7805 }, { "epoch": 0.142687407461568, "grad_norm": 5.706726345107122, "learning_rate": 9.670782584006459e-06, "loss": 17.141, "step": 7806 }, { "epoch": 0.1427056866580145, "grad_norm": 7.344611184842882, "learning_rate": 9.670676940089239e-06, "loss": 17.8914, "step": 7807 }, { "epoch": 0.14272396585446104, "grad_norm": 7.022220843795626, "learning_rate": 9.670571279801699e-06, "loss": 17.6126, "step": 7808 }, { "epoch": 0.14274224505090757, "grad_norm": 7.564178950445632, "learning_rate": 9.670465603144214e-06, "loss": 18.0142, "step": 7809 }, { "epoch": 0.14276052424735408, "grad_norm": 5.9335851077868735, "learning_rate": 9.670359910117153e-06, "loss": 17.1887, "step": 7810 }, { "epoch": 0.14277880344380062, "grad_norm": 6.846377150268151, "learning_rate": 9.670254200720886e-06, "loss": 17.8817, "step": 7811 }, { "epoch": 0.14279708264024712, "grad_norm": 6.747451226197997, "learning_rate": 9.67014847495578e-06, "loss": 17.6365, "step": 7812 }, { "epoch": 0.14281536183669366, "grad_norm": 8.90710576128569, "learning_rate": 9.670042732822212e-06, "loss": 18.3125, "step": 7813 }, { "epoch": 0.1428336410331402, "grad_norm": 5.612174147043535, "learning_rate": 9.669936974320548e-06, "loss": 17.2074, "step": 7814 }, { "epoch": 0.1428519202295867, "grad_norm": 6.7189199468273495, "learning_rate": 9.669831199451161e-06, "loss": 17.4914, "step": 7815 }, { "epoch": 0.14287019942603324, "grad_norm": 6.57693825095924, "learning_rate": 9.66972540821442e-06, "loss": 17.2984, "step": 7816 }, { "epoch": 0.14288847862247975, "grad_norm": 7.099477125925888, "learning_rate": 9.669619600610699e-06, "loss": 17.7003, "step": 7817 }, { "epoch": 0.14290675781892628, "grad_norm": 5.980220718531627, "learning_rate": 9.669513776640364e-06, "loss": 17.449, "step": 7818 }, { "epoch": 0.1429250370153728, "grad_norm": 6.659901544676826, "learning_rate": 9.66940793630379e-06, "loss": 17.4634, "step": 7819 }, { "epoch": 0.14294331621181933, "grad_norm": 7.064695813405876, "learning_rate": 9.669302079601345e-06, "loss": 17.9158, "step": 7820 }, { "epoch": 0.14296159540826586, "grad_norm": 9.535819291754025, "learning_rate": 9.669196206533402e-06, "loss": 18.5917, "step": 7821 }, { "epoch": 0.14297987460471237, "grad_norm": 6.882356198216137, "learning_rate": 9.669090317100331e-06, "loss": 17.802, "step": 7822 }, { "epoch": 0.1429981538011589, "grad_norm": 8.259487175303988, "learning_rate": 9.668984411302504e-06, "loss": 18.0371, "step": 7823 }, { "epoch": 0.1430164329976054, "grad_norm": 7.107527463967547, "learning_rate": 9.668878489140292e-06, "loss": 17.5423, "step": 7824 }, { "epoch": 0.14303471219405195, "grad_norm": 6.455719803755345, "learning_rate": 9.668772550614067e-06, "loss": 17.4033, "step": 7825 }, { "epoch": 0.14305299139049849, "grad_norm": 6.653754036169663, "learning_rate": 9.668666595724196e-06, "loss": 17.7759, "step": 7826 }, { "epoch": 0.143071270586945, "grad_norm": 7.153991568865042, "learning_rate": 9.668560624471057e-06, "loss": 17.9118, "step": 7827 }, { "epoch": 0.14308954978339153, "grad_norm": 6.569018776209766, "learning_rate": 9.668454636855018e-06, "loss": 17.8073, "step": 7828 }, { "epoch": 0.14310782897983804, "grad_norm": 6.221145113425062, "learning_rate": 9.668348632876448e-06, "loss": 17.3776, "step": 7829 }, { "epoch": 0.14312610817628457, "grad_norm": 6.1130743532573595, "learning_rate": 9.668242612535723e-06, "loss": 17.3465, "step": 7830 }, { "epoch": 0.1431443873727311, "grad_norm": 8.126195268243293, "learning_rate": 9.668136575833213e-06, "loss": 18.4532, "step": 7831 }, { "epoch": 0.14316266656917762, "grad_norm": 6.292462249411014, "learning_rate": 9.668030522769289e-06, "loss": 17.4285, "step": 7832 }, { "epoch": 0.14318094576562415, "grad_norm": 6.387665515722563, "learning_rate": 9.667924453344324e-06, "loss": 17.534, "step": 7833 }, { "epoch": 0.14319922496207066, "grad_norm": 5.734749013084801, "learning_rate": 9.667818367558687e-06, "loss": 17.2402, "step": 7834 }, { "epoch": 0.1432175041585172, "grad_norm": 6.849385585121317, "learning_rate": 9.667712265412751e-06, "loss": 18.0146, "step": 7835 }, { "epoch": 0.1432357833549637, "grad_norm": 7.558442824431708, "learning_rate": 9.667606146906892e-06, "loss": 17.546, "step": 7836 }, { "epoch": 0.14325406255141024, "grad_norm": 7.460597625231734, "learning_rate": 9.667500012041476e-06, "loss": 17.6715, "step": 7837 }, { "epoch": 0.14327234174785677, "grad_norm": 7.270287286908869, "learning_rate": 9.667393860816878e-06, "loss": 17.7303, "step": 7838 }, { "epoch": 0.14329062094430328, "grad_norm": 6.437807876874178, "learning_rate": 9.667287693233471e-06, "loss": 17.6004, "step": 7839 }, { "epoch": 0.14330890014074982, "grad_norm": 7.281635820821214, "learning_rate": 9.667181509291623e-06, "loss": 17.8611, "step": 7840 }, { "epoch": 0.14332717933719633, "grad_norm": 9.048553612734262, "learning_rate": 9.66707530899171e-06, "loss": 18.4027, "step": 7841 }, { "epoch": 0.14334545853364286, "grad_norm": 8.407622272493642, "learning_rate": 9.666969092334104e-06, "loss": 18.317, "step": 7842 }, { "epoch": 0.1433637377300894, "grad_norm": 6.741525791108761, "learning_rate": 9.666862859319175e-06, "loss": 17.5255, "step": 7843 }, { "epoch": 0.1433820169265359, "grad_norm": 7.334684755636911, "learning_rate": 9.666756609947297e-06, "loss": 17.7949, "step": 7844 }, { "epoch": 0.14340029612298244, "grad_norm": 6.222155132693972, "learning_rate": 9.666650344218842e-06, "loss": 17.4305, "step": 7845 }, { "epoch": 0.14341857531942895, "grad_norm": 7.249972428703729, "learning_rate": 9.666544062134182e-06, "loss": 17.6051, "step": 7846 }, { "epoch": 0.14343685451587548, "grad_norm": 6.377084097167007, "learning_rate": 9.666437763693691e-06, "loss": 17.5562, "step": 7847 }, { "epoch": 0.14345513371232202, "grad_norm": 7.704324039458236, "learning_rate": 9.66633144889774e-06, "loss": 18.0369, "step": 7848 }, { "epoch": 0.14347341290876853, "grad_norm": 8.830517083057435, "learning_rate": 9.666225117746703e-06, "loss": 18.7811, "step": 7849 }, { "epoch": 0.14349169210521506, "grad_norm": 7.704071862496725, "learning_rate": 9.66611877024095e-06, "loss": 17.5736, "step": 7850 }, { "epoch": 0.14350997130166157, "grad_norm": 6.587411827645895, "learning_rate": 9.666012406380858e-06, "loss": 17.5842, "step": 7851 }, { "epoch": 0.1435282504981081, "grad_norm": 10.814748016890894, "learning_rate": 9.665906026166796e-06, "loss": 19.9485, "step": 7852 }, { "epoch": 0.14354652969455461, "grad_norm": 6.428524945881435, "learning_rate": 9.66579962959914e-06, "loss": 17.6798, "step": 7853 }, { "epoch": 0.14356480889100115, "grad_norm": 7.238464464725674, "learning_rate": 9.665693216678259e-06, "loss": 17.9204, "step": 7854 }, { "epoch": 0.14358308808744769, "grad_norm": 7.525942968671081, "learning_rate": 9.665586787404528e-06, "loss": 18.2486, "step": 7855 }, { "epoch": 0.1436013672838942, "grad_norm": 6.829776281861548, "learning_rate": 9.665480341778322e-06, "loss": 17.51, "step": 7856 }, { "epoch": 0.14361964648034073, "grad_norm": 7.571890437381861, "learning_rate": 9.66537387980001e-06, "loss": 18.1075, "step": 7857 }, { "epoch": 0.14363792567678724, "grad_norm": 7.484927683844003, "learning_rate": 9.66526740146997e-06, "loss": 17.8707, "step": 7858 }, { "epoch": 0.14365620487323377, "grad_norm": 8.00425417309483, "learning_rate": 9.665160906788571e-06, "loss": 18.1998, "step": 7859 }, { "epoch": 0.1436744840696803, "grad_norm": 7.277403688336308, "learning_rate": 9.665054395756188e-06, "loss": 17.9342, "step": 7860 }, { "epoch": 0.14369276326612682, "grad_norm": 7.344833164501549, "learning_rate": 9.664947868373195e-06, "loss": 17.8342, "step": 7861 }, { "epoch": 0.14371104246257335, "grad_norm": 7.627254165312603, "learning_rate": 9.664841324639963e-06, "loss": 18.0097, "step": 7862 }, { "epoch": 0.14372932165901986, "grad_norm": 6.655124794610509, "learning_rate": 9.664734764556869e-06, "loss": 17.5146, "step": 7863 }, { "epoch": 0.1437476008554664, "grad_norm": 7.226636848068177, "learning_rate": 9.664628188124282e-06, "loss": 17.9697, "step": 7864 }, { "epoch": 0.14376588005191293, "grad_norm": 7.629781323015025, "learning_rate": 9.66452159534258e-06, "loss": 18.1848, "step": 7865 }, { "epoch": 0.14378415924835944, "grad_norm": 6.377562023438023, "learning_rate": 9.664414986212134e-06, "loss": 17.3943, "step": 7866 }, { "epoch": 0.14380243844480597, "grad_norm": 7.888258891692893, "learning_rate": 9.664308360733316e-06, "loss": 18.0158, "step": 7867 }, { "epoch": 0.14382071764125248, "grad_norm": 6.701332911179065, "learning_rate": 9.664201718906506e-06, "loss": 17.4241, "step": 7868 }, { "epoch": 0.14383899683769902, "grad_norm": 6.929538947451322, "learning_rate": 9.66409506073207e-06, "loss": 17.7239, "step": 7869 }, { "epoch": 0.14385727603414553, "grad_norm": 7.044341210749867, "learning_rate": 9.663988386210388e-06, "loss": 17.8299, "step": 7870 }, { "epoch": 0.14387555523059206, "grad_norm": 6.927395988763612, "learning_rate": 9.66388169534183e-06, "loss": 17.7399, "step": 7871 }, { "epoch": 0.1438938344270386, "grad_norm": 6.678235413763966, "learning_rate": 9.663774988126772e-06, "loss": 17.2341, "step": 7872 }, { "epoch": 0.1439121136234851, "grad_norm": 6.617014339663367, "learning_rate": 9.663668264565589e-06, "loss": 17.5994, "step": 7873 }, { "epoch": 0.14393039281993164, "grad_norm": 6.135451945627378, "learning_rate": 9.663561524658652e-06, "loss": 17.3528, "step": 7874 }, { "epoch": 0.14394867201637815, "grad_norm": 10.054335633136397, "learning_rate": 9.663454768406335e-06, "loss": 17.8716, "step": 7875 }, { "epoch": 0.14396695121282468, "grad_norm": 7.821853537649704, "learning_rate": 9.663347995809016e-06, "loss": 17.5585, "step": 7876 }, { "epoch": 0.14398523040927122, "grad_norm": 6.6592194858021765, "learning_rate": 9.663241206867065e-06, "loss": 17.5602, "step": 7877 }, { "epoch": 0.14400350960571773, "grad_norm": 7.406957218568989, "learning_rate": 9.66313440158086e-06, "loss": 17.6681, "step": 7878 }, { "epoch": 0.14402178880216426, "grad_norm": 7.183931433865366, "learning_rate": 9.663027579950771e-06, "loss": 17.731, "step": 7879 }, { "epoch": 0.14404006799861077, "grad_norm": 6.795145994337344, "learning_rate": 9.662920741977177e-06, "loss": 17.5502, "step": 7880 }, { "epoch": 0.1440583471950573, "grad_norm": 7.425098615702723, "learning_rate": 9.662813887660451e-06, "loss": 17.3886, "step": 7881 }, { "epoch": 0.14407662639150384, "grad_norm": 7.467302204427384, "learning_rate": 9.662707017000967e-06, "loss": 18.0887, "step": 7882 }, { "epoch": 0.14409490558795035, "grad_norm": 8.129398788716378, "learning_rate": 9.662600129999098e-06, "loss": 18.5446, "step": 7883 }, { "epoch": 0.14411318478439689, "grad_norm": 6.196146066413878, "learning_rate": 9.66249322665522e-06, "loss": 17.2915, "step": 7884 }, { "epoch": 0.1441314639808434, "grad_norm": 6.0383151707049025, "learning_rate": 9.662386306969708e-06, "loss": 17.2602, "step": 7885 }, { "epoch": 0.14414974317728993, "grad_norm": 6.422942706740273, "learning_rate": 9.66227937094294e-06, "loss": 17.3709, "step": 7886 }, { "epoch": 0.14416802237373644, "grad_norm": 7.858451025377491, "learning_rate": 9.662172418575284e-06, "loss": 18.1276, "step": 7887 }, { "epoch": 0.14418630157018297, "grad_norm": 7.249390263111178, "learning_rate": 9.662065449867117e-06, "loss": 17.5421, "step": 7888 }, { "epoch": 0.1442045807666295, "grad_norm": 7.125012968987952, "learning_rate": 9.661958464818818e-06, "loss": 17.6351, "step": 7889 }, { "epoch": 0.14422285996307602, "grad_norm": 6.6766843944047745, "learning_rate": 9.661851463430757e-06, "loss": 17.5045, "step": 7890 }, { "epoch": 0.14424113915952255, "grad_norm": 8.734256701885561, "learning_rate": 9.661744445703314e-06, "loss": 18.4634, "step": 7891 }, { "epoch": 0.14425941835596906, "grad_norm": 6.883130009873722, "learning_rate": 9.661637411636859e-06, "loss": 17.9115, "step": 7892 }, { "epoch": 0.1442776975524156, "grad_norm": 7.642350001935957, "learning_rate": 9.66153036123177e-06, "loss": 18.0374, "step": 7893 }, { "epoch": 0.14429597674886213, "grad_norm": 7.02852331794176, "learning_rate": 9.66142329448842e-06, "loss": 17.8951, "step": 7894 }, { "epoch": 0.14431425594530864, "grad_norm": 8.516363209070965, "learning_rate": 9.66131621140719e-06, "loss": 18.2965, "step": 7895 }, { "epoch": 0.14433253514175517, "grad_norm": 6.603701088154492, "learning_rate": 9.661209111988448e-06, "loss": 17.6132, "step": 7896 }, { "epoch": 0.14435081433820168, "grad_norm": 6.540115720505726, "learning_rate": 9.661101996232572e-06, "loss": 17.4681, "step": 7897 }, { "epoch": 0.14436909353464822, "grad_norm": 6.274607422961523, "learning_rate": 9.66099486413994e-06, "loss": 17.4182, "step": 7898 }, { "epoch": 0.14438737273109475, "grad_norm": 6.738978375188624, "learning_rate": 9.660887715710923e-06, "loss": 17.4482, "step": 7899 }, { "epoch": 0.14440565192754126, "grad_norm": 7.9156443709594555, "learning_rate": 9.6607805509459e-06, "loss": 18.0224, "step": 7900 }, { "epoch": 0.1444239311239878, "grad_norm": 6.811895160844932, "learning_rate": 9.660673369845247e-06, "loss": 17.6024, "step": 7901 }, { "epoch": 0.1444422103204343, "grad_norm": 6.721687375262563, "learning_rate": 9.660566172409339e-06, "loss": 17.6821, "step": 7902 }, { "epoch": 0.14446048951688084, "grad_norm": 7.7652829502734955, "learning_rate": 9.660458958638547e-06, "loss": 17.9376, "step": 7903 }, { "epoch": 0.14447876871332735, "grad_norm": 8.002762225554621, "learning_rate": 9.660351728533256e-06, "loss": 18.1968, "step": 7904 }, { "epoch": 0.14449704790977388, "grad_norm": 6.918270285826691, "learning_rate": 9.660244482093833e-06, "loss": 17.6539, "step": 7905 }, { "epoch": 0.14451532710622042, "grad_norm": 8.26402368486267, "learning_rate": 9.660137219320658e-06, "loss": 18.4072, "step": 7906 }, { "epoch": 0.14453360630266693, "grad_norm": 7.355595966344457, "learning_rate": 9.660029940214107e-06, "loss": 18.0655, "step": 7907 }, { "epoch": 0.14455188549911346, "grad_norm": 6.551187241907956, "learning_rate": 9.659922644774555e-06, "loss": 17.6814, "step": 7908 }, { "epoch": 0.14457016469555997, "grad_norm": 6.360395957826397, "learning_rate": 9.659815333002378e-06, "loss": 17.6062, "step": 7909 }, { "epoch": 0.1445884438920065, "grad_norm": 6.073180624066442, "learning_rate": 9.659708004897953e-06, "loss": 17.4986, "step": 7910 }, { "epoch": 0.14460672308845304, "grad_norm": 7.76183195591155, "learning_rate": 9.659600660461657e-06, "loss": 18.102, "step": 7911 }, { "epoch": 0.14462500228489955, "grad_norm": 8.64672075810826, "learning_rate": 9.659493299693862e-06, "loss": 17.8132, "step": 7912 }, { "epoch": 0.1446432814813461, "grad_norm": 6.318985302818086, "learning_rate": 9.65938592259495e-06, "loss": 17.5563, "step": 7913 }, { "epoch": 0.1446615606777926, "grad_norm": 6.467317018488494, "learning_rate": 9.659278529165295e-06, "loss": 17.3215, "step": 7914 }, { "epoch": 0.14467983987423913, "grad_norm": 6.232911434779717, "learning_rate": 9.659171119405272e-06, "loss": 17.7199, "step": 7915 }, { "epoch": 0.14469811907068567, "grad_norm": 6.687040938801778, "learning_rate": 9.659063693315259e-06, "loss": 17.5385, "step": 7916 }, { "epoch": 0.14471639826713217, "grad_norm": 7.415660908423637, "learning_rate": 9.658956250895631e-06, "loss": 17.9783, "step": 7917 }, { "epoch": 0.1447346774635787, "grad_norm": 7.98814069842345, "learning_rate": 9.658848792146767e-06, "loss": 18.0066, "step": 7918 }, { "epoch": 0.14475295666002522, "grad_norm": 7.844484202682719, "learning_rate": 9.658741317069042e-06, "loss": 17.8192, "step": 7919 }, { "epoch": 0.14477123585647175, "grad_norm": 6.42135409314389, "learning_rate": 9.65863382566283e-06, "loss": 17.5073, "step": 7920 }, { "epoch": 0.14478951505291826, "grad_norm": 7.155972754662482, "learning_rate": 9.658526317928515e-06, "loss": 17.7637, "step": 7921 }, { "epoch": 0.1448077942493648, "grad_norm": 7.226949915611771, "learning_rate": 9.658418793866468e-06, "loss": 17.7123, "step": 7922 }, { "epoch": 0.14482607344581133, "grad_norm": 6.366337750877327, "learning_rate": 9.658311253477066e-06, "loss": 17.652, "step": 7923 }, { "epoch": 0.14484435264225784, "grad_norm": 7.225688591426677, "learning_rate": 9.658203696760688e-06, "loss": 17.9749, "step": 7924 }, { "epoch": 0.14486263183870438, "grad_norm": 7.059189808583485, "learning_rate": 9.658096123717713e-06, "loss": 17.8764, "step": 7925 }, { "epoch": 0.14488091103515088, "grad_norm": 6.54725420805, "learning_rate": 9.65798853434851e-06, "loss": 17.533, "step": 7926 }, { "epoch": 0.14489919023159742, "grad_norm": 8.026008665268165, "learning_rate": 9.657880928653465e-06, "loss": 18.3109, "step": 7927 }, { "epoch": 0.14491746942804395, "grad_norm": 9.46751004244742, "learning_rate": 9.657773306632951e-06, "loss": 18.9406, "step": 7928 }, { "epoch": 0.14493574862449046, "grad_norm": 6.59519055352195, "learning_rate": 9.657665668287345e-06, "loss": 17.6014, "step": 7929 }, { "epoch": 0.144954027820937, "grad_norm": 7.99155854136064, "learning_rate": 9.657558013617028e-06, "loss": 18.1103, "step": 7930 }, { "epoch": 0.1449723070173835, "grad_norm": 6.067982080334245, "learning_rate": 9.657450342622371e-06, "loss": 17.3093, "step": 7931 }, { "epoch": 0.14499058621383004, "grad_norm": 6.81035303639027, "learning_rate": 9.657342655303756e-06, "loss": 17.6884, "step": 7932 }, { "epoch": 0.14500886541027658, "grad_norm": 7.270363925155759, "learning_rate": 9.657234951661558e-06, "loss": 17.8257, "step": 7933 }, { "epoch": 0.14502714460672309, "grad_norm": 5.945637240185427, "learning_rate": 9.657127231696157e-06, "loss": 17.4792, "step": 7934 }, { "epoch": 0.14504542380316962, "grad_norm": 6.75617973397292, "learning_rate": 9.657019495407929e-06, "loss": 17.554, "step": 7935 }, { "epoch": 0.14506370299961613, "grad_norm": 6.7116586245694405, "learning_rate": 9.65691174279725e-06, "loss": 17.6093, "step": 7936 }, { "epoch": 0.14508198219606266, "grad_norm": 6.064911541197413, "learning_rate": 9.656803973864502e-06, "loss": 17.1471, "step": 7937 }, { "epoch": 0.14510026139250917, "grad_norm": 8.827844568819508, "learning_rate": 9.656696188610059e-06, "loss": 18.6249, "step": 7938 }, { "epoch": 0.1451185405889557, "grad_norm": 6.6565575863675965, "learning_rate": 9.656588387034301e-06, "loss": 17.5621, "step": 7939 }, { "epoch": 0.14513681978540224, "grad_norm": 6.424663743895477, "learning_rate": 9.656480569137602e-06, "loss": 17.4067, "step": 7940 }, { "epoch": 0.14515509898184875, "grad_norm": 7.788567478831424, "learning_rate": 9.656372734920345e-06, "loss": 18.0129, "step": 7941 }, { "epoch": 0.1451733781782953, "grad_norm": 7.143982819053273, "learning_rate": 9.656264884382905e-06, "loss": 17.9295, "step": 7942 }, { "epoch": 0.1451916573747418, "grad_norm": 9.235043443693883, "learning_rate": 9.65615701752566e-06, "loss": 17.9051, "step": 7943 }, { "epoch": 0.14520993657118833, "grad_norm": 6.128521553350904, "learning_rate": 9.65604913434899e-06, "loss": 17.4178, "step": 7944 }, { "epoch": 0.14522821576763487, "grad_norm": 6.962582465150469, "learning_rate": 9.655941234853272e-06, "loss": 17.841, "step": 7945 }, { "epoch": 0.14524649496408137, "grad_norm": 6.385896950318575, "learning_rate": 9.655833319038883e-06, "loss": 17.4645, "step": 7946 }, { "epoch": 0.1452647741605279, "grad_norm": 7.562271523019747, "learning_rate": 9.655725386906202e-06, "loss": 18.0817, "step": 7947 }, { "epoch": 0.14528305335697442, "grad_norm": 7.199204285470184, "learning_rate": 9.655617438455608e-06, "loss": 17.69, "step": 7948 }, { "epoch": 0.14530133255342095, "grad_norm": 6.840929518434979, "learning_rate": 9.655509473687479e-06, "loss": 17.4528, "step": 7949 }, { "epoch": 0.1453196117498675, "grad_norm": 6.711427011156019, "learning_rate": 9.655401492602192e-06, "loss": 17.4159, "step": 7950 }, { "epoch": 0.145337890946314, "grad_norm": 5.965039206783731, "learning_rate": 9.655293495200128e-06, "loss": 17.2832, "step": 7951 }, { "epoch": 0.14535617014276053, "grad_norm": 7.310858054849152, "learning_rate": 9.655185481481663e-06, "loss": 17.6517, "step": 7952 }, { "epoch": 0.14537444933920704, "grad_norm": 6.977513673077216, "learning_rate": 9.655077451447179e-06, "loss": 17.6552, "step": 7953 }, { "epoch": 0.14539272853565358, "grad_norm": 5.821779274738296, "learning_rate": 9.654969405097053e-06, "loss": 17.3532, "step": 7954 }, { "epoch": 0.14541100773210008, "grad_norm": 6.758713108971858, "learning_rate": 9.654861342431661e-06, "loss": 17.5775, "step": 7955 }, { "epoch": 0.14542928692854662, "grad_norm": 7.863685222010629, "learning_rate": 9.654753263451385e-06, "loss": 17.8642, "step": 7956 }, { "epoch": 0.14544756612499316, "grad_norm": 9.153913203782722, "learning_rate": 9.654645168156601e-06, "loss": 18.5971, "step": 7957 }, { "epoch": 0.14546584532143966, "grad_norm": 6.745258601713347, "learning_rate": 9.654537056547691e-06, "loss": 17.6042, "step": 7958 }, { "epoch": 0.1454841245178862, "grad_norm": 5.9247000548100734, "learning_rate": 9.654428928625033e-06, "loss": 17.271, "step": 7959 }, { "epoch": 0.1455024037143327, "grad_norm": 7.441009387067109, "learning_rate": 9.654320784389004e-06, "loss": 17.9059, "step": 7960 }, { "epoch": 0.14552068291077924, "grad_norm": 6.891244859298569, "learning_rate": 9.654212623839985e-06, "loss": 18.0463, "step": 7961 }, { "epoch": 0.14553896210722578, "grad_norm": 6.297664198439215, "learning_rate": 9.654104446978357e-06, "loss": 17.7346, "step": 7962 }, { "epoch": 0.14555724130367229, "grad_norm": 8.043669055116208, "learning_rate": 9.653996253804493e-06, "loss": 17.9576, "step": 7963 }, { "epoch": 0.14557552050011882, "grad_norm": 7.57704490359053, "learning_rate": 9.653888044318778e-06, "loss": 18.1225, "step": 7964 }, { "epoch": 0.14559379969656533, "grad_norm": 7.788256163236495, "learning_rate": 9.65377981852159e-06, "loss": 17.9134, "step": 7965 }, { "epoch": 0.14561207889301186, "grad_norm": 6.710542169959897, "learning_rate": 9.653671576413306e-06, "loss": 17.732, "step": 7966 }, { "epoch": 0.1456303580894584, "grad_norm": 6.024146392515028, "learning_rate": 9.653563317994307e-06, "loss": 17.4358, "step": 7967 }, { "epoch": 0.1456486372859049, "grad_norm": 6.633504266152885, "learning_rate": 9.653455043264974e-06, "loss": 17.4505, "step": 7968 }, { "epoch": 0.14566691648235144, "grad_norm": 6.211227687837813, "learning_rate": 9.653346752225683e-06, "loss": 17.4499, "step": 7969 }, { "epoch": 0.14568519567879795, "grad_norm": 8.400148275059234, "learning_rate": 9.653238444876817e-06, "loss": 18.3171, "step": 7970 }, { "epoch": 0.1457034748752445, "grad_norm": 7.739331881310937, "learning_rate": 9.653130121218754e-06, "loss": 18.3042, "step": 7971 }, { "epoch": 0.145721754071691, "grad_norm": 7.955012682984678, "learning_rate": 9.653021781251872e-06, "loss": 17.9062, "step": 7972 }, { "epoch": 0.14574003326813753, "grad_norm": 6.259595286305838, "learning_rate": 9.652913424976553e-06, "loss": 17.4814, "step": 7973 }, { "epoch": 0.14575831246458407, "grad_norm": 6.078297879047728, "learning_rate": 9.652805052393178e-06, "loss": 17.4524, "step": 7974 }, { "epoch": 0.14577659166103057, "grad_norm": 9.375932177975441, "learning_rate": 9.652696663502123e-06, "loss": 18.5682, "step": 7975 }, { "epoch": 0.1457948708574771, "grad_norm": 6.257526122207114, "learning_rate": 9.65258825830377e-06, "loss": 17.4044, "step": 7976 }, { "epoch": 0.14581315005392362, "grad_norm": 7.945139211966594, "learning_rate": 9.652479836798501e-06, "loss": 18.2119, "step": 7977 }, { "epoch": 0.14583142925037015, "grad_norm": 7.520533804922809, "learning_rate": 9.65237139898669e-06, "loss": 17.8324, "step": 7978 }, { "epoch": 0.1458497084468167, "grad_norm": 7.5445427347964, "learning_rate": 9.652262944868724e-06, "loss": 18.0339, "step": 7979 }, { "epoch": 0.1458679876432632, "grad_norm": 5.744613699632315, "learning_rate": 9.65215447444498e-06, "loss": 17.1529, "step": 7980 }, { "epoch": 0.14588626683970973, "grad_norm": 7.102725612497753, "learning_rate": 9.652045987715838e-06, "loss": 17.7757, "step": 7981 }, { "epoch": 0.14590454603615624, "grad_norm": 9.217413539931432, "learning_rate": 9.651937484681678e-06, "loss": 18.5535, "step": 7982 }, { "epoch": 0.14592282523260278, "grad_norm": 7.146597866539307, "learning_rate": 9.651828965342882e-06, "loss": 17.6874, "step": 7983 }, { "epoch": 0.1459411044290493, "grad_norm": 5.622544955129633, "learning_rate": 9.651720429699827e-06, "loss": 17.1046, "step": 7984 }, { "epoch": 0.14595938362549582, "grad_norm": 7.543782899563225, "learning_rate": 9.651611877752897e-06, "loss": 17.7099, "step": 7985 }, { "epoch": 0.14597766282194236, "grad_norm": 7.342211067682582, "learning_rate": 9.65150330950247e-06, "loss": 17.8767, "step": 7986 }, { "epoch": 0.14599594201838886, "grad_norm": 7.435391159299244, "learning_rate": 9.651394724948929e-06, "loss": 17.9265, "step": 7987 }, { "epoch": 0.1460142212148354, "grad_norm": 6.28794682429356, "learning_rate": 9.651286124092653e-06, "loss": 17.5435, "step": 7988 }, { "epoch": 0.1460325004112819, "grad_norm": 6.252197394803422, "learning_rate": 9.651177506934022e-06, "loss": 17.2206, "step": 7989 }, { "epoch": 0.14605077960772844, "grad_norm": 7.09374348651782, "learning_rate": 9.651068873473417e-06, "loss": 17.8473, "step": 7990 }, { "epoch": 0.14606905880417498, "grad_norm": 7.013619760961217, "learning_rate": 9.65096022371122e-06, "loss": 17.5454, "step": 7991 }, { "epoch": 0.14608733800062149, "grad_norm": 7.179235417174337, "learning_rate": 9.65085155764781e-06, "loss": 17.66, "step": 7992 }, { "epoch": 0.14610561719706802, "grad_norm": 5.4883918065536434, "learning_rate": 9.65074287528357e-06, "loss": 16.9709, "step": 7993 }, { "epoch": 0.14612389639351453, "grad_norm": 8.699244438136956, "learning_rate": 9.65063417661888e-06, "loss": 18.3041, "step": 7994 }, { "epoch": 0.14614217558996107, "grad_norm": 7.01955896623706, "learning_rate": 9.65052546165412e-06, "loss": 17.5045, "step": 7995 }, { "epoch": 0.1461604547864076, "grad_norm": 7.715833247104848, "learning_rate": 9.650416730389672e-06, "loss": 18.2539, "step": 7996 }, { "epoch": 0.1461787339828541, "grad_norm": 7.261767826837706, "learning_rate": 9.650307982825917e-06, "loss": 17.5952, "step": 7997 }, { "epoch": 0.14619701317930064, "grad_norm": 6.385983497206502, "learning_rate": 9.650199218963236e-06, "loss": 17.6332, "step": 7998 }, { "epoch": 0.14621529237574715, "grad_norm": 8.344318378430048, "learning_rate": 9.650090438802012e-06, "loss": 18.0755, "step": 7999 }, { "epoch": 0.1462335715721937, "grad_norm": 6.472024326689775, "learning_rate": 9.649981642342621e-06, "loss": 17.4189, "step": 8000 }, { "epoch": 0.14625185076864022, "grad_norm": 6.715048457193912, "learning_rate": 9.64987282958545e-06, "loss": 17.6987, "step": 8001 }, { "epoch": 0.14627012996508673, "grad_norm": 6.13079996475156, "learning_rate": 9.649764000530878e-06, "loss": 17.2704, "step": 8002 }, { "epoch": 0.14628840916153327, "grad_norm": 6.671879485010782, "learning_rate": 9.649655155179287e-06, "loss": 17.36, "step": 8003 }, { "epoch": 0.14630668835797978, "grad_norm": 7.370770516624002, "learning_rate": 9.649546293531057e-06, "loss": 17.542, "step": 8004 }, { "epoch": 0.1463249675544263, "grad_norm": 8.079342234334176, "learning_rate": 9.64943741558657e-06, "loss": 18.2899, "step": 8005 }, { "epoch": 0.14634324675087282, "grad_norm": 6.903482231699645, "learning_rate": 9.64932852134621e-06, "loss": 17.8285, "step": 8006 }, { "epoch": 0.14636152594731935, "grad_norm": 10.070179892659032, "learning_rate": 9.649219610810359e-06, "loss": 18.9447, "step": 8007 }, { "epoch": 0.1463798051437659, "grad_norm": 7.730014505390262, "learning_rate": 9.649110683979394e-06, "loss": 18.2886, "step": 8008 }, { "epoch": 0.1463980843402124, "grad_norm": 8.30281918365547, "learning_rate": 9.6490017408537e-06, "loss": 18.4275, "step": 8009 }, { "epoch": 0.14641636353665893, "grad_norm": 7.1197873864420895, "learning_rate": 9.648892781433657e-06, "loss": 17.7806, "step": 8010 }, { "epoch": 0.14643464273310544, "grad_norm": 8.427168269867161, "learning_rate": 9.64878380571965e-06, "loss": 18.1895, "step": 8011 }, { "epoch": 0.14645292192955198, "grad_norm": 6.783781928597438, "learning_rate": 9.648674813712059e-06, "loss": 17.9056, "step": 8012 }, { "epoch": 0.1464712011259985, "grad_norm": 6.678274522360001, "learning_rate": 9.648565805411265e-06, "loss": 17.4284, "step": 8013 }, { "epoch": 0.14648948032244502, "grad_norm": 7.8352790288474585, "learning_rate": 9.648456780817651e-06, "loss": 18.2195, "step": 8014 }, { "epoch": 0.14650775951889156, "grad_norm": 8.09375938615011, "learning_rate": 9.648347739931603e-06, "loss": 17.9966, "step": 8015 }, { "epoch": 0.14652603871533806, "grad_norm": 7.659916058842947, "learning_rate": 9.648238682753497e-06, "loss": 17.8873, "step": 8016 }, { "epoch": 0.1465443179117846, "grad_norm": 5.915365313583533, "learning_rate": 9.648129609283716e-06, "loss": 17.3144, "step": 8017 }, { "epoch": 0.14656259710823114, "grad_norm": 7.193445790671711, "learning_rate": 9.648020519522647e-06, "loss": 17.7632, "step": 8018 }, { "epoch": 0.14658087630467764, "grad_norm": 7.0641124489645, "learning_rate": 9.647911413470668e-06, "loss": 17.9136, "step": 8019 }, { "epoch": 0.14659915550112418, "grad_norm": 5.959791630226095, "learning_rate": 9.647802291128163e-06, "loss": 17.2778, "step": 8020 }, { "epoch": 0.1466174346975707, "grad_norm": 6.2009743065833485, "learning_rate": 9.647693152495514e-06, "loss": 17.4677, "step": 8021 }, { "epoch": 0.14663571389401722, "grad_norm": 7.8164225640752045, "learning_rate": 9.647583997573105e-06, "loss": 18.0936, "step": 8022 }, { "epoch": 0.14665399309046373, "grad_norm": 5.6426609914409065, "learning_rate": 9.647474826361316e-06, "loss": 17.1358, "step": 8023 }, { "epoch": 0.14667227228691027, "grad_norm": 7.281189933670641, "learning_rate": 9.64736563886053e-06, "loss": 18.0002, "step": 8024 }, { "epoch": 0.1466905514833568, "grad_norm": 7.156884754375371, "learning_rate": 9.647256435071133e-06, "loss": 17.9063, "step": 8025 }, { "epoch": 0.1467088306798033, "grad_norm": 5.953496097128827, "learning_rate": 9.647147214993504e-06, "loss": 17.1232, "step": 8026 }, { "epoch": 0.14672710987624984, "grad_norm": 7.920627054244241, "learning_rate": 9.647037978628029e-06, "loss": 17.8285, "step": 8027 }, { "epoch": 0.14674538907269635, "grad_norm": 10.69892625842698, "learning_rate": 9.646928725975087e-06, "loss": 18.6059, "step": 8028 }, { "epoch": 0.1467636682691429, "grad_norm": 6.3435637466396715, "learning_rate": 9.646819457035064e-06, "loss": 17.3268, "step": 8029 }, { "epoch": 0.14678194746558942, "grad_norm": 7.240777143096906, "learning_rate": 9.646710171808342e-06, "loss": 18.0841, "step": 8030 }, { "epoch": 0.14680022666203593, "grad_norm": 8.367751790399302, "learning_rate": 9.646600870295305e-06, "loss": 18.0374, "step": 8031 }, { "epoch": 0.14681850585848247, "grad_norm": 6.4560850582753275, "learning_rate": 9.646491552496336e-06, "loss": 17.5513, "step": 8032 }, { "epoch": 0.14683678505492898, "grad_norm": 7.321443836457617, "learning_rate": 9.646382218411813e-06, "loss": 18.0615, "step": 8033 }, { "epoch": 0.1468550642513755, "grad_norm": 6.3652537476119555, "learning_rate": 9.646272868042129e-06, "loss": 17.4165, "step": 8034 }, { "epoch": 0.14687334344782205, "grad_norm": 7.594431029046291, "learning_rate": 9.646163501387658e-06, "loss": 17.8259, "step": 8035 }, { "epoch": 0.14689162264426855, "grad_norm": 7.19503817395823, "learning_rate": 9.646054118448787e-06, "loss": 17.8467, "step": 8036 }, { "epoch": 0.1469099018407151, "grad_norm": 10.580051274042303, "learning_rate": 9.645944719225902e-06, "loss": 18.1914, "step": 8037 }, { "epoch": 0.1469281810371616, "grad_norm": 8.852314628875943, "learning_rate": 9.645835303719382e-06, "loss": 18.2564, "step": 8038 }, { "epoch": 0.14694646023360813, "grad_norm": 6.427733925668023, "learning_rate": 9.645725871929614e-06, "loss": 17.3538, "step": 8039 }, { "epoch": 0.14696473943005464, "grad_norm": 7.311157127937562, "learning_rate": 9.645616423856978e-06, "loss": 17.7997, "step": 8040 }, { "epoch": 0.14698301862650118, "grad_norm": 6.070143885303128, "learning_rate": 9.64550695950186e-06, "loss": 17.2713, "step": 8041 }, { "epoch": 0.1470012978229477, "grad_norm": 5.817498232664236, "learning_rate": 9.645397478864645e-06, "loss": 17.4812, "step": 8042 }, { "epoch": 0.14701957701939422, "grad_norm": 6.67491314100426, "learning_rate": 9.645287981945712e-06, "loss": 17.652, "step": 8043 }, { "epoch": 0.14703785621584076, "grad_norm": 7.288494360246087, "learning_rate": 9.64517846874545e-06, "loss": 18.0519, "step": 8044 }, { "epoch": 0.14705613541228726, "grad_norm": 7.709262361894076, "learning_rate": 9.64506893926424e-06, "loss": 17.821, "step": 8045 }, { "epoch": 0.1470744146087338, "grad_norm": 7.2491534881899415, "learning_rate": 9.644959393502467e-06, "loss": 17.957, "step": 8046 }, { "epoch": 0.14709269380518034, "grad_norm": 8.27859495332456, "learning_rate": 9.644849831460513e-06, "loss": 18.3226, "step": 8047 }, { "epoch": 0.14711097300162684, "grad_norm": 6.229448535308603, "learning_rate": 9.644740253138765e-06, "loss": 17.3666, "step": 8048 }, { "epoch": 0.14712925219807338, "grad_norm": 8.281485590991695, "learning_rate": 9.644630658537604e-06, "loss": 18.089, "step": 8049 }, { "epoch": 0.1471475313945199, "grad_norm": 7.260511924938508, "learning_rate": 9.644521047657416e-06, "loss": 17.8782, "step": 8050 }, { "epoch": 0.14716581059096642, "grad_norm": 8.075624424086456, "learning_rate": 9.644411420498585e-06, "loss": 17.9388, "step": 8051 }, { "epoch": 0.14718408978741296, "grad_norm": 7.205715603077248, "learning_rate": 9.644301777061495e-06, "loss": 18.0204, "step": 8052 }, { "epoch": 0.14720236898385947, "grad_norm": 7.077535799197512, "learning_rate": 9.64419211734653e-06, "loss": 17.7748, "step": 8053 }, { "epoch": 0.147220648180306, "grad_norm": 9.997143239685304, "learning_rate": 9.644082441354075e-06, "loss": 17.8055, "step": 8054 }, { "epoch": 0.1472389273767525, "grad_norm": 8.951299796489199, "learning_rate": 9.643972749084513e-06, "loss": 18.4639, "step": 8055 }, { "epoch": 0.14725720657319905, "grad_norm": 6.512609840491759, "learning_rate": 9.643863040538231e-06, "loss": 17.4196, "step": 8056 }, { "epoch": 0.14727548576964555, "grad_norm": 9.13164669058537, "learning_rate": 9.64375331571561e-06, "loss": 17.8644, "step": 8057 }, { "epoch": 0.1472937649660921, "grad_norm": 6.442756046873929, "learning_rate": 9.643643574617039e-06, "loss": 17.5869, "step": 8058 }, { "epoch": 0.14731204416253862, "grad_norm": 6.495695349937012, "learning_rate": 9.6435338172429e-06, "loss": 17.464, "step": 8059 }, { "epoch": 0.14733032335898513, "grad_norm": 7.259579718403372, "learning_rate": 9.643424043593576e-06, "loss": 17.7688, "step": 8060 }, { "epoch": 0.14734860255543167, "grad_norm": 7.712910100182315, "learning_rate": 9.643314253669455e-06, "loss": 17.8725, "step": 8061 }, { "epoch": 0.14736688175187818, "grad_norm": 7.739286195484188, "learning_rate": 9.643204447470922e-06, "loss": 17.7906, "step": 8062 }, { "epoch": 0.1473851609483247, "grad_norm": 7.248123211068903, "learning_rate": 9.643094624998357e-06, "loss": 17.7043, "step": 8063 }, { "epoch": 0.14740344014477125, "grad_norm": 7.188458907414833, "learning_rate": 9.64298478625215e-06, "loss": 17.6625, "step": 8064 }, { "epoch": 0.14742171934121776, "grad_norm": 7.148484029794683, "learning_rate": 9.642874931232684e-06, "loss": 17.8504, "step": 8065 }, { "epoch": 0.1474399985376643, "grad_norm": 7.387959283440316, "learning_rate": 9.642765059940344e-06, "loss": 18.3822, "step": 8066 }, { "epoch": 0.1474582777341108, "grad_norm": 8.856985975380033, "learning_rate": 9.642655172375516e-06, "loss": 18.7405, "step": 8067 }, { "epoch": 0.14747655693055733, "grad_norm": 7.766467161611203, "learning_rate": 9.642545268538585e-06, "loss": 17.7352, "step": 8068 }, { "epoch": 0.14749483612700387, "grad_norm": 6.911400134219339, "learning_rate": 9.642435348429935e-06, "loss": 17.7442, "step": 8069 }, { "epoch": 0.14751311532345038, "grad_norm": 8.535640558915466, "learning_rate": 9.642325412049952e-06, "loss": 18.199, "step": 8070 }, { "epoch": 0.1475313945198969, "grad_norm": 8.537335326600212, "learning_rate": 9.64221545939902e-06, "loss": 17.82, "step": 8071 }, { "epoch": 0.14754967371634342, "grad_norm": 6.633873060369528, "learning_rate": 9.642105490477527e-06, "loss": 17.7843, "step": 8072 }, { "epoch": 0.14756795291278996, "grad_norm": 6.4278340117422905, "learning_rate": 9.641995505285858e-06, "loss": 17.3694, "step": 8073 }, { "epoch": 0.14758623210923646, "grad_norm": 7.069412562342773, "learning_rate": 9.641885503824395e-06, "loss": 18.1548, "step": 8074 }, { "epoch": 0.147604511305683, "grad_norm": 6.767035618080706, "learning_rate": 9.64177548609353e-06, "loss": 17.9882, "step": 8075 }, { "epoch": 0.14762279050212954, "grad_norm": 7.903839015408739, "learning_rate": 9.641665452093641e-06, "loss": 18.1572, "step": 8076 }, { "epoch": 0.14764106969857604, "grad_norm": 7.2825054707225405, "learning_rate": 9.641555401825118e-06, "loss": 17.8129, "step": 8077 }, { "epoch": 0.14765934889502258, "grad_norm": 6.761561047503617, "learning_rate": 9.641445335288346e-06, "loss": 17.7295, "step": 8078 }, { "epoch": 0.1476776280914691, "grad_norm": 6.618816072053667, "learning_rate": 9.641335252483712e-06, "loss": 17.4507, "step": 8079 }, { "epoch": 0.14769590728791562, "grad_norm": 6.487847127290106, "learning_rate": 9.6412251534116e-06, "loss": 17.3601, "step": 8080 }, { "epoch": 0.14771418648436216, "grad_norm": 6.9250736911951725, "learning_rate": 9.641115038072397e-06, "loss": 17.8905, "step": 8081 }, { "epoch": 0.14773246568080867, "grad_norm": 8.476983225022746, "learning_rate": 9.641004906466488e-06, "loss": 18.3762, "step": 8082 }, { "epoch": 0.1477507448772552, "grad_norm": 8.761072999506217, "learning_rate": 9.64089475859426e-06, "loss": 18.624, "step": 8083 }, { "epoch": 0.1477690240737017, "grad_norm": 6.328830632630538, "learning_rate": 9.6407845944561e-06, "loss": 17.3398, "step": 8084 }, { "epoch": 0.14778730327014825, "grad_norm": 8.9338263657113, "learning_rate": 9.640674414052391e-06, "loss": 18.9664, "step": 8085 }, { "epoch": 0.14780558246659478, "grad_norm": 7.661138891664235, "learning_rate": 9.640564217383522e-06, "loss": 18.3349, "step": 8086 }, { "epoch": 0.1478238616630413, "grad_norm": 7.187970700208174, "learning_rate": 9.640454004449877e-06, "loss": 17.8618, "step": 8087 }, { "epoch": 0.14784214085948783, "grad_norm": 5.871236125636451, "learning_rate": 9.640343775251844e-06, "loss": 17.2336, "step": 8088 }, { "epoch": 0.14786042005593433, "grad_norm": 7.393317643417872, "learning_rate": 9.640233529789806e-06, "loss": 17.8336, "step": 8089 }, { "epoch": 0.14787869925238087, "grad_norm": 9.451071523509363, "learning_rate": 9.640123268064156e-06, "loss": 18.0403, "step": 8090 }, { "epoch": 0.14789697844882738, "grad_norm": 6.5374182020368, "learning_rate": 9.640012990075274e-06, "loss": 17.6703, "step": 8091 }, { "epoch": 0.1479152576452739, "grad_norm": 8.233324766017134, "learning_rate": 9.63990269582355e-06, "loss": 17.9709, "step": 8092 }, { "epoch": 0.14793353684172045, "grad_norm": 5.731050775772712, "learning_rate": 9.63979238530937e-06, "loss": 17.242, "step": 8093 }, { "epoch": 0.14795181603816696, "grad_norm": 6.218482515209717, "learning_rate": 9.63968205853312e-06, "loss": 17.3829, "step": 8094 }, { "epoch": 0.1479700952346135, "grad_norm": 6.736553892820769, "learning_rate": 9.639571715495189e-06, "loss": 17.6474, "step": 8095 }, { "epoch": 0.14798837443106, "grad_norm": 7.513901910688545, "learning_rate": 9.639461356195958e-06, "loss": 18.1575, "step": 8096 }, { "epoch": 0.14800665362750653, "grad_norm": 10.488684500822124, "learning_rate": 9.63935098063582e-06, "loss": 17.6842, "step": 8097 }, { "epoch": 0.14802493282395307, "grad_norm": 8.081786160838991, "learning_rate": 9.63924058881516e-06, "loss": 18.2097, "step": 8098 }, { "epoch": 0.14804321202039958, "grad_norm": 8.68145308215769, "learning_rate": 9.639130180734362e-06, "loss": 18.1371, "step": 8099 }, { "epoch": 0.14806149121684611, "grad_norm": 8.067734968278177, "learning_rate": 9.639019756393817e-06, "loss": 17.8399, "step": 8100 }, { "epoch": 0.14807977041329262, "grad_norm": 6.382101361188045, "learning_rate": 9.63890931579391e-06, "loss": 17.3079, "step": 8101 }, { "epoch": 0.14809804960973916, "grad_norm": 5.183982865642566, "learning_rate": 9.638798858935028e-06, "loss": 16.8802, "step": 8102 }, { "epoch": 0.1481163288061857, "grad_norm": 6.6951633909441535, "learning_rate": 9.638688385817558e-06, "loss": 17.7609, "step": 8103 }, { "epoch": 0.1481346080026322, "grad_norm": 6.194876638611268, "learning_rate": 9.63857789644189e-06, "loss": 17.3454, "step": 8104 }, { "epoch": 0.14815288719907874, "grad_norm": 5.317406037764646, "learning_rate": 9.638467390808405e-06, "loss": 17.1418, "step": 8105 }, { "epoch": 0.14817116639552524, "grad_norm": 6.895413692636472, "learning_rate": 9.638356868917497e-06, "loss": 17.577, "step": 8106 }, { "epoch": 0.14818944559197178, "grad_norm": 8.008443410440348, "learning_rate": 9.638246330769552e-06, "loss": 18.035, "step": 8107 }, { "epoch": 0.1482077247884183, "grad_norm": 5.74120716951446, "learning_rate": 9.638135776364954e-06, "loss": 17.203, "step": 8108 }, { "epoch": 0.14822600398486482, "grad_norm": 8.54913912805209, "learning_rate": 9.638025205704094e-06, "loss": 18.1013, "step": 8109 }, { "epoch": 0.14824428318131136, "grad_norm": 8.312580974732796, "learning_rate": 9.637914618787356e-06, "loss": 18.1672, "step": 8110 }, { "epoch": 0.14826256237775787, "grad_norm": 6.738350444607828, "learning_rate": 9.63780401561513e-06, "loss": 17.7801, "step": 8111 }, { "epoch": 0.1482808415742044, "grad_norm": 7.886413767494075, "learning_rate": 9.637693396187806e-06, "loss": 17.9368, "step": 8112 }, { "epoch": 0.1482991207706509, "grad_norm": 7.875815936487451, "learning_rate": 9.637582760505767e-06, "loss": 17.3937, "step": 8113 }, { "epoch": 0.14831739996709745, "grad_norm": 6.596152726567714, "learning_rate": 9.637472108569404e-06, "loss": 17.5027, "step": 8114 }, { "epoch": 0.14833567916354398, "grad_norm": 6.710516305595572, "learning_rate": 9.637361440379102e-06, "loss": 17.8169, "step": 8115 }, { "epoch": 0.1483539583599905, "grad_norm": 6.928265849809754, "learning_rate": 9.637250755935252e-06, "loss": 17.3836, "step": 8116 }, { "epoch": 0.14837223755643703, "grad_norm": 6.500901711677311, "learning_rate": 9.63714005523824e-06, "loss": 17.4885, "step": 8117 }, { "epoch": 0.14839051675288353, "grad_norm": 7.405538827349513, "learning_rate": 9.637029338288454e-06, "loss": 17.837, "step": 8118 }, { "epoch": 0.14840879594933007, "grad_norm": 7.252059624928057, "learning_rate": 9.636918605086283e-06, "loss": 17.5855, "step": 8119 }, { "epoch": 0.1484270751457766, "grad_norm": 6.926791768096578, "learning_rate": 9.636807855632115e-06, "loss": 17.6411, "step": 8120 }, { "epoch": 0.1484453543422231, "grad_norm": 6.5719604807225895, "learning_rate": 9.636697089926338e-06, "loss": 17.5774, "step": 8121 }, { "epoch": 0.14846363353866965, "grad_norm": 6.662271402853505, "learning_rate": 9.636586307969338e-06, "loss": 17.7661, "step": 8122 }, { "epoch": 0.14848191273511616, "grad_norm": 7.132505202028377, "learning_rate": 9.636475509761507e-06, "loss": 17.6119, "step": 8123 }, { "epoch": 0.1485001919315627, "grad_norm": 8.094454678992484, "learning_rate": 9.636364695303234e-06, "loss": 17.5317, "step": 8124 }, { "epoch": 0.1485184711280092, "grad_norm": 7.831661732240495, "learning_rate": 9.6362538645949e-06, "loss": 17.7834, "step": 8125 }, { "epoch": 0.14853675032445574, "grad_norm": 6.011204911661293, "learning_rate": 9.636143017636901e-06, "loss": 17.1371, "step": 8126 }, { "epoch": 0.14855502952090227, "grad_norm": 7.7689813298236645, "learning_rate": 9.636032154429624e-06, "loss": 18.2324, "step": 8127 }, { "epoch": 0.14857330871734878, "grad_norm": 6.4227869864023805, "learning_rate": 9.635921274973457e-06, "loss": 17.782, "step": 8128 }, { "epoch": 0.14859158791379531, "grad_norm": 7.21488812752111, "learning_rate": 9.635810379268786e-06, "loss": 17.543, "step": 8129 }, { "epoch": 0.14860986711024182, "grad_norm": 7.942197203982288, "learning_rate": 9.635699467316002e-06, "loss": 17.9881, "step": 8130 }, { "epoch": 0.14862814630668836, "grad_norm": 8.764202749204488, "learning_rate": 9.635588539115495e-06, "loss": 18.4816, "step": 8131 }, { "epoch": 0.1486464255031349, "grad_norm": 7.777306134782962, "learning_rate": 9.635477594667653e-06, "loss": 18.1013, "step": 8132 }, { "epoch": 0.1486647046995814, "grad_norm": 7.83180863384052, "learning_rate": 9.635366633972863e-06, "loss": 17.3537, "step": 8133 }, { "epoch": 0.14868298389602794, "grad_norm": 6.262583354547982, "learning_rate": 9.635255657031515e-06, "loss": 17.5054, "step": 8134 }, { "epoch": 0.14870126309247444, "grad_norm": 7.658041012486796, "learning_rate": 9.635144663843999e-06, "loss": 17.9531, "step": 8135 }, { "epoch": 0.14871954228892098, "grad_norm": 6.808612918804337, "learning_rate": 9.635033654410703e-06, "loss": 17.7325, "step": 8136 }, { "epoch": 0.14873782148536752, "grad_norm": 7.258150622509804, "learning_rate": 9.634922628732015e-06, "loss": 17.7368, "step": 8137 }, { "epoch": 0.14875610068181402, "grad_norm": 6.9982574431914575, "learning_rate": 9.634811586808327e-06, "loss": 17.6144, "step": 8138 }, { "epoch": 0.14877437987826056, "grad_norm": 7.857884053169739, "learning_rate": 9.634700528640026e-06, "loss": 17.9164, "step": 8139 }, { "epoch": 0.14879265907470707, "grad_norm": 7.452730871277512, "learning_rate": 9.634589454227502e-06, "loss": 17.6904, "step": 8140 }, { "epoch": 0.1488109382711536, "grad_norm": 6.9768223589685, "learning_rate": 9.634478363571144e-06, "loss": 17.6711, "step": 8141 }, { "epoch": 0.1488292174676001, "grad_norm": 6.947093534664517, "learning_rate": 9.634367256671342e-06, "loss": 17.8603, "step": 8142 }, { "epoch": 0.14884749666404665, "grad_norm": 8.065395022833629, "learning_rate": 9.634256133528483e-06, "loss": 17.9711, "step": 8143 }, { "epoch": 0.14886577586049318, "grad_norm": 5.587161523282454, "learning_rate": 9.63414499414296e-06, "loss": 17.0566, "step": 8144 }, { "epoch": 0.1488840550569397, "grad_norm": 6.629157279648962, "learning_rate": 9.634033838515162e-06, "loss": 17.5394, "step": 8145 }, { "epoch": 0.14890233425338623, "grad_norm": 9.91512398762502, "learning_rate": 9.633922666645475e-06, "loss": 17.7891, "step": 8146 }, { "epoch": 0.14892061344983273, "grad_norm": 6.5990876856195655, "learning_rate": 9.633811478534293e-06, "loss": 17.5892, "step": 8147 }, { "epoch": 0.14893889264627927, "grad_norm": 7.022881311193155, "learning_rate": 9.633700274182003e-06, "loss": 17.5351, "step": 8148 }, { "epoch": 0.1489571718427258, "grad_norm": 5.6144195274142055, "learning_rate": 9.633589053588997e-06, "loss": 16.9259, "step": 8149 }, { "epoch": 0.1489754510391723, "grad_norm": 7.738487347820152, "learning_rate": 9.63347781675566e-06, "loss": 18.1853, "step": 8150 }, { "epoch": 0.14899373023561885, "grad_norm": 6.193180519595679, "learning_rate": 9.63336656368239e-06, "loss": 17.5029, "step": 8151 }, { "epoch": 0.14901200943206536, "grad_norm": 7.092227173166322, "learning_rate": 9.633255294369569e-06, "loss": 17.9659, "step": 8152 }, { "epoch": 0.1490302886285119, "grad_norm": 6.745416215426979, "learning_rate": 9.63314400881759e-06, "loss": 17.657, "step": 8153 }, { "epoch": 0.14904856782495843, "grad_norm": 8.15380868776113, "learning_rate": 9.633032707026846e-06, "loss": 18.2057, "step": 8154 }, { "epoch": 0.14906684702140494, "grad_norm": 6.331248922332758, "learning_rate": 9.632921388997722e-06, "loss": 17.1826, "step": 8155 }, { "epoch": 0.14908512621785147, "grad_norm": 6.805012976161833, "learning_rate": 9.632810054730611e-06, "loss": 17.6045, "step": 8156 }, { "epoch": 0.14910340541429798, "grad_norm": 6.046630693939717, "learning_rate": 9.632698704225904e-06, "loss": 17.2938, "step": 8157 }, { "epoch": 0.14912168461074451, "grad_norm": 6.757383522376019, "learning_rate": 9.632587337483989e-06, "loss": 17.5262, "step": 8158 }, { "epoch": 0.14913996380719102, "grad_norm": 8.323600026726774, "learning_rate": 9.632475954505258e-06, "loss": 18.1282, "step": 8159 }, { "epoch": 0.14915824300363756, "grad_norm": 6.499193928467957, "learning_rate": 9.6323645552901e-06, "loss": 17.0955, "step": 8160 }, { "epoch": 0.1491765222000841, "grad_norm": 6.966078916831902, "learning_rate": 9.632253139838906e-06, "loss": 17.8794, "step": 8161 }, { "epoch": 0.1491948013965306, "grad_norm": 6.599636555603426, "learning_rate": 9.632141708152068e-06, "loss": 17.5405, "step": 8162 }, { "epoch": 0.14921308059297714, "grad_norm": 7.28404182257285, "learning_rate": 9.632030260229974e-06, "loss": 17.5913, "step": 8163 }, { "epoch": 0.14923135978942365, "grad_norm": 7.026371351499305, "learning_rate": 9.631918796073017e-06, "loss": 17.6872, "step": 8164 }, { "epoch": 0.14924963898587018, "grad_norm": 6.586875317455057, "learning_rate": 9.631807315681586e-06, "loss": 17.4985, "step": 8165 }, { "epoch": 0.14926791818231672, "grad_norm": 6.5463039923326, "learning_rate": 9.631695819056073e-06, "loss": 17.7064, "step": 8166 }, { "epoch": 0.14928619737876322, "grad_norm": 9.692044910483968, "learning_rate": 9.631584306196866e-06, "loss": 18.3559, "step": 8167 }, { "epoch": 0.14930447657520976, "grad_norm": 7.022885606869071, "learning_rate": 9.631472777104361e-06, "loss": 17.4101, "step": 8168 }, { "epoch": 0.14932275577165627, "grad_norm": 8.913693817095838, "learning_rate": 9.631361231778944e-06, "loss": 18.2285, "step": 8169 }, { "epoch": 0.1493410349681028, "grad_norm": 6.636074960057482, "learning_rate": 9.631249670221007e-06, "loss": 17.6323, "step": 8170 }, { "epoch": 0.14935931416454934, "grad_norm": 6.843279083385719, "learning_rate": 9.631138092430943e-06, "loss": 17.6723, "step": 8171 }, { "epoch": 0.14937759336099585, "grad_norm": 6.873244248974223, "learning_rate": 9.631026498409142e-06, "loss": 17.5206, "step": 8172 }, { "epoch": 0.14939587255744238, "grad_norm": 6.66895985156897, "learning_rate": 9.630914888155993e-06, "loss": 17.5482, "step": 8173 }, { "epoch": 0.1494141517538889, "grad_norm": 6.853114025017008, "learning_rate": 9.630803261671892e-06, "loss": 17.7752, "step": 8174 }, { "epoch": 0.14943243095033543, "grad_norm": 6.133412828735075, "learning_rate": 9.630691618957225e-06, "loss": 17.5157, "step": 8175 }, { "epoch": 0.14945071014678193, "grad_norm": 7.621202411677604, "learning_rate": 9.630579960012387e-06, "loss": 18.3171, "step": 8176 }, { "epoch": 0.14946898934322847, "grad_norm": 7.582676238264134, "learning_rate": 9.630468284837769e-06, "loss": 17.986, "step": 8177 }, { "epoch": 0.149487268539675, "grad_norm": 6.258409215946996, "learning_rate": 9.63035659343376e-06, "loss": 17.3532, "step": 8178 }, { "epoch": 0.1495055477361215, "grad_norm": 6.355855197434192, "learning_rate": 9.630244885800753e-06, "loss": 17.6792, "step": 8179 }, { "epoch": 0.14952382693256805, "grad_norm": 8.220841132045704, "learning_rate": 9.63013316193914e-06, "loss": 18.196, "step": 8180 }, { "epoch": 0.14954210612901456, "grad_norm": 6.636291930323637, "learning_rate": 9.630021421849311e-06, "loss": 17.5067, "step": 8181 }, { "epoch": 0.1495603853254611, "grad_norm": 7.5266465981644854, "learning_rate": 9.629909665531661e-06, "loss": 18.0338, "step": 8182 }, { "epoch": 0.14957866452190763, "grad_norm": 7.035010101666306, "learning_rate": 9.629797892986576e-06, "loss": 17.7137, "step": 8183 }, { "epoch": 0.14959694371835414, "grad_norm": 7.163355240729544, "learning_rate": 9.629686104214453e-06, "loss": 17.8504, "step": 8184 }, { "epoch": 0.14961522291480067, "grad_norm": 7.1227862610442925, "learning_rate": 9.629574299215682e-06, "loss": 17.8507, "step": 8185 }, { "epoch": 0.14963350211124718, "grad_norm": 7.485342666547704, "learning_rate": 9.629462477990656e-06, "loss": 18.1443, "step": 8186 }, { "epoch": 0.14965178130769372, "grad_norm": 8.198642072686255, "learning_rate": 9.629350640539763e-06, "loss": 18.1238, "step": 8187 }, { "epoch": 0.14967006050414025, "grad_norm": 6.974602313031887, "learning_rate": 9.629238786863401e-06, "loss": 17.547, "step": 8188 }, { "epoch": 0.14968833970058676, "grad_norm": 7.12877973991162, "learning_rate": 9.629126916961958e-06, "loss": 17.8723, "step": 8189 }, { "epoch": 0.1497066188970333, "grad_norm": 7.462106769459328, "learning_rate": 9.629015030835824e-06, "loss": 17.7146, "step": 8190 }, { "epoch": 0.1497248980934798, "grad_norm": 6.839041078834273, "learning_rate": 9.628903128485396e-06, "loss": 17.96, "step": 8191 }, { "epoch": 0.14974317728992634, "grad_norm": 6.82052383103971, "learning_rate": 9.628791209911063e-06, "loss": 17.986, "step": 8192 }, { "epoch": 0.14976145648637285, "grad_norm": 7.663386127315567, "learning_rate": 9.62867927511322e-06, "loss": 17.935, "step": 8193 }, { "epoch": 0.14977973568281938, "grad_norm": 8.631183227611471, "learning_rate": 9.628567324092259e-06, "loss": 18.2473, "step": 8194 }, { "epoch": 0.14979801487926592, "grad_norm": 7.283198318557219, "learning_rate": 9.62845535684857e-06, "loss": 17.8827, "step": 8195 }, { "epoch": 0.14981629407571243, "grad_norm": 6.898003121324752, "learning_rate": 9.628343373382545e-06, "loss": 17.7923, "step": 8196 }, { "epoch": 0.14983457327215896, "grad_norm": 7.205962008746917, "learning_rate": 9.628231373694579e-06, "loss": 17.7067, "step": 8197 }, { "epoch": 0.14985285246860547, "grad_norm": 6.369033875223682, "learning_rate": 9.628119357785064e-06, "loss": 17.4865, "step": 8198 }, { "epoch": 0.149871131665052, "grad_norm": 7.086959868206967, "learning_rate": 9.628007325654392e-06, "loss": 17.7848, "step": 8199 }, { "epoch": 0.14988941086149854, "grad_norm": 7.554476693798808, "learning_rate": 9.627895277302957e-06, "loss": 17.9422, "step": 8200 }, { "epoch": 0.14990769005794505, "grad_norm": 7.975346708415603, "learning_rate": 9.62778321273115e-06, "loss": 18.1469, "step": 8201 }, { "epoch": 0.14992596925439158, "grad_norm": 7.016908788442051, "learning_rate": 9.627671131939363e-06, "loss": 17.8922, "step": 8202 }, { "epoch": 0.1499442484508381, "grad_norm": 8.85927995340893, "learning_rate": 9.627559034927992e-06, "loss": 18.2604, "step": 8203 }, { "epoch": 0.14996252764728463, "grad_norm": 6.774773064259678, "learning_rate": 9.627446921697427e-06, "loss": 17.3408, "step": 8204 }, { "epoch": 0.14998080684373116, "grad_norm": 6.294227610418616, "learning_rate": 9.627334792248064e-06, "loss": 17.2782, "step": 8205 }, { "epoch": 0.14999908604017767, "grad_norm": 7.369330199875309, "learning_rate": 9.627222646580291e-06, "loss": 17.9668, "step": 8206 }, { "epoch": 0.1500173652366242, "grad_norm": 8.27917234719453, "learning_rate": 9.627110484694506e-06, "loss": 18.1977, "step": 8207 }, { "epoch": 0.15003564443307071, "grad_norm": 7.8009489813351856, "learning_rate": 9.626998306591101e-06, "loss": 17.868, "step": 8208 }, { "epoch": 0.15005392362951725, "grad_norm": 7.77946634764763, "learning_rate": 9.626886112270467e-06, "loss": 17.7802, "step": 8209 }, { "epoch": 0.15007220282596376, "grad_norm": 6.884261644919327, "learning_rate": 9.626773901733e-06, "loss": 17.7854, "step": 8210 }, { "epoch": 0.1500904820224103, "grad_norm": 8.006721368370812, "learning_rate": 9.62666167497909e-06, "loss": 17.9747, "step": 8211 }, { "epoch": 0.15010876121885683, "grad_norm": 5.848516308608391, "learning_rate": 9.626549432009135e-06, "loss": 17.2307, "step": 8212 }, { "epoch": 0.15012704041530334, "grad_norm": 9.007683572794024, "learning_rate": 9.626437172823523e-06, "loss": 18.6756, "step": 8213 }, { "epoch": 0.15014531961174987, "grad_norm": 6.84217977983457, "learning_rate": 9.626324897422651e-06, "loss": 17.8017, "step": 8214 }, { "epoch": 0.15016359880819638, "grad_norm": 6.124497949168756, "learning_rate": 9.626212605806914e-06, "loss": 17.3608, "step": 8215 }, { "epoch": 0.15018187800464292, "grad_norm": 6.226158237857329, "learning_rate": 9.626100297976702e-06, "loss": 17.1185, "step": 8216 }, { "epoch": 0.15020015720108945, "grad_norm": 7.583217676650587, "learning_rate": 9.62598797393241e-06, "loss": 17.8746, "step": 8217 }, { "epoch": 0.15021843639753596, "grad_norm": 7.05815119157305, "learning_rate": 9.625875633674428e-06, "loss": 17.5794, "step": 8218 }, { "epoch": 0.1502367155939825, "grad_norm": 6.131312477463043, "learning_rate": 9.625763277203157e-06, "loss": 17.2176, "step": 8219 }, { "epoch": 0.150254994790429, "grad_norm": 9.33147680707214, "learning_rate": 9.625650904518986e-06, "loss": 18.486, "step": 8220 }, { "epoch": 0.15027327398687554, "grad_norm": 7.028451742104949, "learning_rate": 9.625538515622311e-06, "loss": 17.3888, "step": 8221 }, { "epoch": 0.15029155318332207, "grad_norm": 6.105778569504314, "learning_rate": 9.625426110513524e-06, "loss": 17.4368, "step": 8222 }, { "epoch": 0.15030983237976858, "grad_norm": 6.231559648553829, "learning_rate": 9.625313689193021e-06, "loss": 17.5064, "step": 8223 }, { "epoch": 0.15032811157621512, "grad_norm": 6.6105422614414096, "learning_rate": 9.625201251661193e-06, "loss": 17.4768, "step": 8224 }, { "epoch": 0.15034639077266163, "grad_norm": 7.787476106469747, "learning_rate": 9.625088797918437e-06, "loss": 18.2933, "step": 8225 }, { "epoch": 0.15036466996910816, "grad_norm": 6.97417096298724, "learning_rate": 9.624976327965146e-06, "loss": 17.5706, "step": 8226 }, { "epoch": 0.15038294916555467, "grad_norm": 5.758546880269337, "learning_rate": 9.624863841801715e-06, "loss": 17.1828, "step": 8227 }, { "epoch": 0.1504012283620012, "grad_norm": 6.765842795066055, "learning_rate": 9.624751339428537e-06, "loss": 17.3873, "step": 8228 }, { "epoch": 0.15041950755844774, "grad_norm": 6.8451688463139675, "learning_rate": 9.624638820846005e-06, "loss": 17.8134, "step": 8229 }, { "epoch": 0.15043778675489425, "grad_norm": 7.433415462032307, "learning_rate": 9.624526286054519e-06, "loss": 17.9911, "step": 8230 }, { "epoch": 0.15045606595134078, "grad_norm": 6.736119752188272, "learning_rate": 9.624413735054468e-06, "loss": 17.5756, "step": 8231 }, { "epoch": 0.1504743451477873, "grad_norm": 6.432736461057537, "learning_rate": 9.624301167846246e-06, "loss": 17.4461, "step": 8232 }, { "epoch": 0.15049262434423383, "grad_norm": 6.918900635903595, "learning_rate": 9.624188584430252e-06, "loss": 17.4558, "step": 8233 }, { "epoch": 0.15051090354068036, "grad_norm": 7.75983457150723, "learning_rate": 9.624075984806878e-06, "loss": 17.8908, "step": 8234 }, { "epoch": 0.15052918273712687, "grad_norm": 8.282034442417402, "learning_rate": 9.623963368976519e-06, "loss": 18.27, "step": 8235 }, { "epoch": 0.1505474619335734, "grad_norm": 7.158220967820405, "learning_rate": 9.623850736939568e-06, "loss": 17.8442, "step": 8236 }, { "epoch": 0.15056574113001991, "grad_norm": 7.071703795375992, "learning_rate": 9.623738088696425e-06, "loss": 17.6172, "step": 8237 }, { "epoch": 0.15058402032646645, "grad_norm": 6.619604300037013, "learning_rate": 9.623625424247479e-06, "loss": 17.538, "step": 8238 }, { "epoch": 0.15060229952291299, "grad_norm": 8.159289456968516, "learning_rate": 9.623512743593126e-06, "loss": 18.3043, "step": 8239 }, { "epoch": 0.1506205787193595, "grad_norm": 7.891801653755344, "learning_rate": 9.623400046733762e-06, "loss": 18.214, "step": 8240 }, { "epoch": 0.15063885791580603, "grad_norm": 7.315861513886885, "learning_rate": 9.623287333669784e-06, "loss": 17.6339, "step": 8241 }, { "epoch": 0.15065713711225254, "grad_norm": 6.842762964775592, "learning_rate": 9.623174604401584e-06, "loss": 17.8821, "step": 8242 }, { "epoch": 0.15067541630869907, "grad_norm": 7.45071955468202, "learning_rate": 9.623061858929558e-06, "loss": 17.8013, "step": 8243 }, { "epoch": 0.15069369550514558, "grad_norm": 7.278205613368842, "learning_rate": 9.622949097254103e-06, "loss": 17.5657, "step": 8244 }, { "epoch": 0.15071197470159212, "grad_norm": 6.689719787695791, "learning_rate": 9.62283631937561e-06, "loss": 17.3731, "step": 8245 }, { "epoch": 0.15073025389803865, "grad_norm": 5.89983201704077, "learning_rate": 9.62272352529448e-06, "loss": 17.1696, "step": 8246 }, { "epoch": 0.15074853309448516, "grad_norm": 7.432211932522837, "learning_rate": 9.622610715011103e-06, "loss": 17.8963, "step": 8247 }, { "epoch": 0.1507668122909317, "grad_norm": 7.287658201953378, "learning_rate": 9.622497888525878e-06, "loss": 18.0643, "step": 8248 }, { "epoch": 0.1507850914873782, "grad_norm": 7.3204055710807, "learning_rate": 9.622385045839197e-06, "loss": 17.8771, "step": 8249 }, { "epoch": 0.15080337068382474, "grad_norm": 7.268925788270548, "learning_rate": 9.622272186951458e-06, "loss": 18.2706, "step": 8250 }, { "epoch": 0.15082164988027127, "grad_norm": 7.474373604212861, "learning_rate": 9.622159311863057e-06, "loss": 18.1105, "step": 8251 }, { "epoch": 0.15083992907671778, "grad_norm": 6.301086570045507, "learning_rate": 9.622046420574389e-06, "loss": 17.472, "step": 8252 }, { "epoch": 0.15085820827316432, "grad_norm": 6.698138661279464, "learning_rate": 9.621933513085848e-06, "loss": 17.7003, "step": 8253 }, { "epoch": 0.15087648746961083, "grad_norm": 7.081841556616977, "learning_rate": 9.621820589397832e-06, "loss": 17.6653, "step": 8254 }, { "epoch": 0.15089476666605736, "grad_norm": 7.3238418748277745, "learning_rate": 9.621707649510736e-06, "loss": 17.9191, "step": 8255 }, { "epoch": 0.1509130458625039, "grad_norm": 6.742651680720778, "learning_rate": 9.621594693424955e-06, "loss": 17.6599, "step": 8256 }, { "epoch": 0.1509313250589504, "grad_norm": 7.58101079672959, "learning_rate": 9.621481721140885e-06, "loss": 18.2079, "step": 8257 }, { "epoch": 0.15094960425539694, "grad_norm": 6.863753551861713, "learning_rate": 9.621368732658925e-06, "loss": 17.8148, "step": 8258 }, { "epoch": 0.15096788345184345, "grad_norm": 6.47264056577828, "learning_rate": 9.621255727979467e-06, "loss": 17.4016, "step": 8259 }, { "epoch": 0.15098616264828998, "grad_norm": 7.603533666439708, "learning_rate": 9.621142707102908e-06, "loss": 18.2694, "step": 8260 }, { "epoch": 0.1510044418447365, "grad_norm": 6.540029090559662, "learning_rate": 9.621029670029647e-06, "loss": 17.6542, "step": 8261 }, { "epoch": 0.15102272104118303, "grad_norm": 7.316452676273734, "learning_rate": 9.620916616760076e-06, "loss": 17.7939, "step": 8262 }, { "epoch": 0.15104100023762956, "grad_norm": 7.181725949839706, "learning_rate": 9.620803547294595e-06, "loss": 17.8618, "step": 8263 }, { "epoch": 0.15105927943407607, "grad_norm": 8.046996352147591, "learning_rate": 9.620690461633597e-06, "loss": 18.103, "step": 8264 }, { "epoch": 0.1510775586305226, "grad_norm": 6.452773693713817, "learning_rate": 9.620577359777481e-06, "loss": 17.5604, "step": 8265 }, { "epoch": 0.15109583782696911, "grad_norm": 6.74310278963078, "learning_rate": 9.62046424172664e-06, "loss": 17.6312, "step": 8266 }, { "epoch": 0.15111411702341565, "grad_norm": 8.863382977064171, "learning_rate": 9.620351107481476e-06, "loss": 18.6413, "step": 8267 }, { "epoch": 0.1511323962198622, "grad_norm": 6.426130488184393, "learning_rate": 9.620237957042382e-06, "loss": 17.4138, "step": 8268 }, { "epoch": 0.1511506754163087, "grad_norm": 7.165066842644639, "learning_rate": 9.620124790409752e-06, "loss": 17.4794, "step": 8269 }, { "epoch": 0.15116895461275523, "grad_norm": 5.783885020588841, "learning_rate": 9.620011607583988e-06, "loss": 17.186, "step": 8270 }, { "epoch": 0.15118723380920174, "grad_norm": 6.653501274399079, "learning_rate": 9.619898408565485e-06, "loss": 17.8724, "step": 8271 }, { "epoch": 0.15120551300564827, "grad_norm": 9.47444081173225, "learning_rate": 9.619785193354636e-06, "loss": 17.9671, "step": 8272 }, { "epoch": 0.1512237922020948, "grad_norm": 8.314644938169216, "learning_rate": 9.619671961951843e-06, "loss": 18.4144, "step": 8273 }, { "epoch": 0.15124207139854132, "grad_norm": 7.804214639424101, "learning_rate": 9.6195587143575e-06, "loss": 17.815, "step": 8274 }, { "epoch": 0.15126035059498785, "grad_norm": 6.494363859522821, "learning_rate": 9.619445450572005e-06, "loss": 17.7352, "step": 8275 }, { "epoch": 0.15127862979143436, "grad_norm": 6.75121851459919, "learning_rate": 9.619332170595753e-06, "loss": 17.5873, "step": 8276 }, { "epoch": 0.1512969089878809, "grad_norm": 8.221373152111685, "learning_rate": 9.619218874429144e-06, "loss": 18.549, "step": 8277 }, { "epoch": 0.1513151881843274, "grad_norm": 7.065653786126481, "learning_rate": 9.619105562072573e-06, "loss": 17.5719, "step": 8278 }, { "epoch": 0.15133346738077394, "grad_norm": 8.495427470673599, "learning_rate": 9.618992233526438e-06, "loss": 18.1892, "step": 8279 }, { "epoch": 0.15135174657722048, "grad_norm": 6.594544755034357, "learning_rate": 9.618878888791136e-06, "loss": 17.3768, "step": 8280 }, { "epoch": 0.15137002577366698, "grad_norm": 7.174457454636815, "learning_rate": 9.618765527867065e-06, "loss": 17.9663, "step": 8281 }, { "epoch": 0.15138830497011352, "grad_norm": 6.486051817146391, "learning_rate": 9.618652150754621e-06, "loss": 17.5412, "step": 8282 }, { "epoch": 0.15140658416656003, "grad_norm": 6.660156852894946, "learning_rate": 9.618538757454202e-06, "loss": 17.6116, "step": 8283 }, { "epoch": 0.15142486336300656, "grad_norm": 6.652304984094611, "learning_rate": 9.618425347966206e-06, "loss": 17.7333, "step": 8284 }, { "epoch": 0.1514431425594531, "grad_norm": 9.697218587626619, "learning_rate": 9.61831192229103e-06, "loss": 18.755, "step": 8285 }, { "epoch": 0.1514614217558996, "grad_norm": 5.983378811259161, "learning_rate": 9.618198480429071e-06, "loss": 17.2025, "step": 8286 }, { "epoch": 0.15147970095234614, "grad_norm": 6.9403136869832, "learning_rate": 9.618085022380727e-06, "loss": 17.7489, "step": 8287 }, { "epoch": 0.15149798014879265, "grad_norm": 6.289242074092284, "learning_rate": 9.617971548146395e-06, "loss": 17.511, "step": 8288 }, { "epoch": 0.15151625934523918, "grad_norm": 10.072604609154403, "learning_rate": 9.617858057726474e-06, "loss": 17.6072, "step": 8289 }, { "epoch": 0.15153453854168572, "grad_norm": 7.738213544209298, "learning_rate": 9.617744551121362e-06, "loss": 18.0687, "step": 8290 }, { "epoch": 0.15155281773813223, "grad_norm": 8.88955160968627, "learning_rate": 9.617631028331455e-06, "loss": 18.2794, "step": 8291 }, { "epoch": 0.15157109693457876, "grad_norm": 7.129723897496373, "learning_rate": 9.617517489357153e-06, "loss": 18.0201, "step": 8292 }, { "epoch": 0.15158937613102527, "grad_norm": 6.187657092673538, "learning_rate": 9.617403934198852e-06, "loss": 17.2293, "step": 8293 }, { "epoch": 0.1516076553274718, "grad_norm": 7.77523702408604, "learning_rate": 9.61729036285695e-06, "loss": 18.4022, "step": 8294 }, { "epoch": 0.15162593452391832, "grad_norm": 7.051666309443853, "learning_rate": 9.617176775331848e-06, "loss": 18.0027, "step": 8295 }, { "epoch": 0.15164421372036485, "grad_norm": 7.648719821643449, "learning_rate": 9.61706317162394e-06, "loss": 17.4062, "step": 8296 }, { "epoch": 0.1516624929168114, "grad_norm": 6.553220877152369, "learning_rate": 9.61694955173363e-06, "loss": 17.4329, "step": 8297 }, { "epoch": 0.1516807721132579, "grad_norm": 7.048283344257818, "learning_rate": 9.616835915661308e-06, "loss": 17.8347, "step": 8298 }, { "epoch": 0.15169905130970443, "grad_norm": 8.622176775355504, "learning_rate": 9.616722263407381e-06, "loss": 18.3363, "step": 8299 }, { "epoch": 0.15171733050615094, "grad_norm": 7.3725743209216645, "learning_rate": 9.61660859497224e-06, "loss": 17.8952, "step": 8300 }, { "epoch": 0.15173560970259747, "grad_norm": 7.667911868647525, "learning_rate": 9.616494910356287e-06, "loss": 18.1777, "step": 8301 }, { "epoch": 0.151753888899044, "grad_norm": 5.6619680493594675, "learning_rate": 9.61638120955992e-06, "loss": 17.278, "step": 8302 }, { "epoch": 0.15177216809549052, "grad_norm": 7.518921455476554, "learning_rate": 9.616267492583538e-06, "loss": 17.9855, "step": 8303 }, { "epoch": 0.15179044729193705, "grad_norm": 7.362345100622486, "learning_rate": 9.616153759427539e-06, "loss": 17.8035, "step": 8304 }, { "epoch": 0.15180872648838356, "grad_norm": 6.771119541757221, "learning_rate": 9.616040010092322e-06, "loss": 17.447, "step": 8305 }, { "epoch": 0.1518270056848301, "grad_norm": 11.090780843549402, "learning_rate": 9.615926244578283e-06, "loss": 19.3201, "step": 8306 }, { "epoch": 0.15184528488127663, "grad_norm": 7.69397219050792, "learning_rate": 9.615812462885825e-06, "loss": 18.143, "step": 8307 }, { "epoch": 0.15186356407772314, "grad_norm": 6.581930909105399, "learning_rate": 9.615698665015345e-06, "loss": 17.4815, "step": 8308 }, { "epoch": 0.15188184327416968, "grad_norm": 7.343480040899406, "learning_rate": 9.615584850967242e-06, "loss": 18.1581, "step": 8309 }, { "epoch": 0.15190012247061618, "grad_norm": 7.281686300188413, "learning_rate": 9.615471020741913e-06, "loss": 17.8876, "step": 8310 }, { "epoch": 0.15191840166706272, "grad_norm": 6.023210772722194, "learning_rate": 9.615357174339759e-06, "loss": 17.1831, "step": 8311 }, { "epoch": 0.15193668086350923, "grad_norm": 8.554098411455712, "learning_rate": 9.61524331176118e-06, "loss": 18.1684, "step": 8312 }, { "epoch": 0.15195496005995576, "grad_norm": 6.123535324372592, "learning_rate": 9.615129433006573e-06, "loss": 17.3887, "step": 8313 }, { "epoch": 0.1519732392564023, "grad_norm": 5.73365668953176, "learning_rate": 9.615015538076338e-06, "loss": 16.9956, "step": 8314 }, { "epoch": 0.1519915184528488, "grad_norm": 7.799627749598492, "learning_rate": 9.614901626970873e-06, "loss": 18.2006, "step": 8315 }, { "epoch": 0.15200979764929534, "grad_norm": 6.543433915866202, "learning_rate": 9.61478769969058e-06, "loss": 17.4647, "step": 8316 }, { "epoch": 0.15202807684574185, "grad_norm": 8.78596846594541, "learning_rate": 9.614673756235854e-06, "loss": 18.5613, "step": 8317 }, { "epoch": 0.15204635604218839, "grad_norm": 7.318576023539961, "learning_rate": 9.6145597966071e-06, "loss": 17.8715, "step": 8318 }, { "epoch": 0.15206463523863492, "grad_norm": 7.380504903544179, "learning_rate": 9.614445820804711e-06, "loss": 17.8962, "step": 8319 }, { "epoch": 0.15208291443508143, "grad_norm": 7.335299172284095, "learning_rate": 9.614331828829091e-06, "loss": 18.0066, "step": 8320 }, { "epoch": 0.15210119363152796, "grad_norm": 6.601506403760204, "learning_rate": 9.614217820680641e-06, "loss": 17.2911, "step": 8321 }, { "epoch": 0.15211947282797447, "grad_norm": 8.246965261381105, "learning_rate": 9.614103796359755e-06, "loss": 18.3982, "step": 8322 }, { "epoch": 0.152137752024421, "grad_norm": 5.151529787885932, "learning_rate": 9.613989755866835e-06, "loss": 16.8491, "step": 8323 }, { "epoch": 0.15215603122086754, "grad_norm": 11.081636020111297, "learning_rate": 9.613875699202284e-06, "loss": 18.446, "step": 8324 }, { "epoch": 0.15217431041731405, "grad_norm": 6.756162709600048, "learning_rate": 9.613761626366498e-06, "loss": 17.7069, "step": 8325 }, { "epoch": 0.1521925896137606, "grad_norm": 6.15423922061458, "learning_rate": 9.613647537359878e-06, "loss": 17.3873, "step": 8326 }, { "epoch": 0.1522108688102071, "grad_norm": 6.603928084287123, "learning_rate": 9.613533432182822e-06, "loss": 17.5039, "step": 8327 }, { "epoch": 0.15222914800665363, "grad_norm": 5.958683100556394, "learning_rate": 9.613419310835734e-06, "loss": 17.2975, "step": 8328 }, { "epoch": 0.15224742720310014, "grad_norm": 7.536025657207551, "learning_rate": 9.61330517331901e-06, "loss": 17.6498, "step": 8329 }, { "epoch": 0.15226570639954667, "grad_norm": 7.580601309602559, "learning_rate": 9.613191019633053e-06, "loss": 18.1946, "step": 8330 }, { "epoch": 0.1522839855959932, "grad_norm": 6.5379268220550575, "learning_rate": 9.61307684977826e-06, "loss": 17.5752, "step": 8331 }, { "epoch": 0.15230226479243972, "grad_norm": 7.891329417755743, "learning_rate": 9.612962663755035e-06, "loss": 18.1304, "step": 8332 }, { "epoch": 0.15232054398888625, "grad_norm": 5.96099476374749, "learning_rate": 9.612848461563776e-06, "loss": 17.3644, "step": 8333 }, { "epoch": 0.15233882318533276, "grad_norm": 7.558239477538888, "learning_rate": 9.612734243204882e-06, "loss": 18.055, "step": 8334 }, { "epoch": 0.1523571023817793, "grad_norm": 7.722599467178709, "learning_rate": 9.612620008678755e-06, "loss": 17.9425, "step": 8335 }, { "epoch": 0.15237538157822583, "grad_norm": 7.270872958368524, "learning_rate": 9.612505757985795e-06, "loss": 17.782, "step": 8336 }, { "epoch": 0.15239366077467234, "grad_norm": 7.504788950693975, "learning_rate": 9.612391491126403e-06, "loss": 18.0103, "step": 8337 }, { "epoch": 0.15241193997111888, "grad_norm": 7.005902932978128, "learning_rate": 9.612277208100979e-06, "loss": 17.8027, "step": 8338 }, { "epoch": 0.15243021916756538, "grad_norm": 8.191436162031327, "learning_rate": 9.612162908909924e-06, "loss": 18.4753, "step": 8339 }, { "epoch": 0.15244849836401192, "grad_norm": 7.765269070423985, "learning_rate": 9.612048593553639e-06, "loss": 18.4064, "step": 8340 }, { "epoch": 0.15246677756045846, "grad_norm": 6.722960780984315, "learning_rate": 9.611934262032522e-06, "loss": 17.2695, "step": 8341 }, { "epoch": 0.15248505675690496, "grad_norm": 7.270974839599677, "learning_rate": 9.611819914346978e-06, "loss": 17.8775, "step": 8342 }, { "epoch": 0.1525033359533515, "grad_norm": 6.80859111730569, "learning_rate": 9.611705550497404e-06, "loss": 17.4734, "step": 8343 }, { "epoch": 0.152521615149798, "grad_norm": 6.633348536040492, "learning_rate": 9.611591170484202e-06, "loss": 17.5199, "step": 8344 }, { "epoch": 0.15253989434624454, "grad_norm": 6.301785897670512, "learning_rate": 9.611476774307773e-06, "loss": 17.3887, "step": 8345 }, { "epoch": 0.15255817354269105, "grad_norm": 7.8771904174235035, "learning_rate": 9.611362361968519e-06, "loss": 18.2089, "step": 8346 }, { "epoch": 0.15257645273913759, "grad_norm": 6.054082585091909, "learning_rate": 9.611247933466838e-06, "loss": 17.1358, "step": 8347 }, { "epoch": 0.15259473193558412, "grad_norm": 6.025642937983709, "learning_rate": 9.611133488803134e-06, "loss": 17.3968, "step": 8348 }, { "epoch": 0.15261301113203063, "grad_norm": 6.454400638632909, "learning_rate": 9.611019027977809e-06, "loss": 17.4408, "step": 8349 }, { "epoch": 0.15263129032847716, "grad_norm": 8.576715940344029, "learning_rate": 9.610904550991262e-06, "loss": 18.3775, "step": 8350 }, { "epoch": 0.15264956952492367, "grad_norm": 6.125045802018629, "learning_rate": 9.610790057843892e-06, "loss": 17.254, "step": 8351 }, { "epoch": 0.1526678487213702, "grad_norm": 6.4307968288897275, "learning_rate": 9.610675548536107e-06, "loss": 17.4323, "step": 8352 }, { "epoch": 0.15268612791781674, "grad_norm": 7.637818889551061, "learning_rate": 9.610561023068301e-06, "loss": 17.7409, "step": 8353 }, { "epoch": 0.15270440711426325, "grad_norm": 8.152296239217721, "learning_rate": 9.61044648144088e-06, "loss": 17.9951, "step": 8354 }, { "epoch": 0.1527226863107098, "grad_norm": 7.319634671798852, "learning_rate": 9.610331923654243e-06, "loss": 18.2639, "step": 8355 }, { "epoch": 0.1527409655071563, "grad_norm": 7.4942349441715175, "learning_rate": 9.610217349708796e-06, "loss": 18.0278, "step": 8356 }, { "epoch": 0.15275924470360283, "grad_norm": 8.099602138005944, "learning_rate": 9.610102759604934e-06, "loss": 18.1418, "step": 8357 }, { "epoch": 0.15277752390004937, "grad_norm": 6.82012127276604, "learning_rate": 9.609988153343064e-06, "loss": 17.4784, "step": 8358 }, { "epoch": 0.15279580309649587, "grad_norm": 7.054166587710082, "learning_rate": 9.609873530923584e-06, "loss": 17.6846, "step": 8359 }, { "epoch": 0.1528140822929424, "grad_norm": 7.558182667582104, "learning_rate": 9.609758892346897e-06, "loss": 17.7426, "step": 8360 }, { "epoch": 0.15283236148938892, "grad_norm": 8.073310691773505, "learning_rate": 9.609644237613407e-06, "loss": 17.7529, "step": 8361 }, { "epoch": 0.15285064068583545, "grad_norm": 7.4418851112152185, "learning_rate": 9.609529566723512e-06, "loss": 18.0476, "step": 8362 }, { "epoch": 0.15286891988228196, "grad_norm": 6.894493856924612, "learning_rate": 9.609414879677617e-06, "loss": 17.5686, "step": 8363 }, { "epoch": 0.1528871990787285, "grad_norm": 7.459148446865004, "learning_rate": 9.609300176476123e-06, "loss": 18.014, "step": 8364 }, { "epoch": 0.15290547827517503, "grad_norm": 6.4473080737450115, "learning_rate": 9.60918545711943e-06, "loss": 17.5612, "step": 8365 }, { "epoch": 0.15292375747162154, "grad_norm": 5.680097380401487, "learning_rate": 9.609070721607943e-06, "loss": 17.1274, "step": 8366 }, { "epoch": 0.15294203666806808, "grad_norm": 6.237437755248435, "learning_rate": 9.608955969942064e-06, "loss": 17.4624, "step": 8367 }, { "epoch": 0.15296031586451458, "grad_norm": 8.264595237228436, "learning_rate": 9.608841202122193e-06, "loss": 18.344, "step": 8368 }, { "epoch": 0.15297859506096112, "grad_norm": 8.77713457863881, "learning_rate": 9.608726418148736e-06, "loss": 17.7051, "step": 8369 }, { "epoch": 0.15299687425740766, "grad_norm": 5.972412339171528, "learning_rate": 9.60861161802209e-06, "loss": 17.4506, "step": 8370 }, { "epoch": 0.15301515345385416, "grad_norm": 7.518788273320577, "learning_rate": 9.60849680174266e-06, "loss": 17.8397, "step": 8371 }, { "epoch": 0.1530334326503007, "grad_norm": 6.5896332317018285, "learning_rate": 9.608381969310851e-06, "loss": 17.5284, "step": 8372 }, { "epoch": 0.1530517118467472, "grad_norm": 7.233193816201468, "learning_rate": 9.608267120727061e-06, "loss": 17.8026, "step": 8373 }, { "epoch": 0.15306999104319374, "grad_norm": 7.96773633243533, "learning_rate": 9.608152255991696e-06, "loss": 17.6385, "step": 8374 }, { "epoch": 0.15308827023964028, "grad_norm": 6.059440494427977, "learning_rate": 9.608037375105157e-06, "loss": 17.675, "step": 8375 }, { "epoch": 0.1531065494360868, "grad_norm": 7.290037755876943, "learning_rate": 9.607922478067845e-06, "loss": 18.0835, "step": 8376 }, { "epoch": 0.15312482863253332, "grad_norm": 7.756126214542827, "learning_rate": 9.607807564880168e-06, "loss": 17.9592, "step": 8377 }, { "epoch": 0.15314310782897983, "grad_norm": 8.972905092985298, "learning_rate": 9.607692635542523e-06, "loss": 18.5581, "step": 8378 }, { "epoch": 0.15316138702542637, "grad_norm": 8.293569395540622, "learning_rate": 9.607577690055316e-06, "loss": 18.278, "step": 8379 }, { "epoch": 0.15317966622187287, "grad_norm": 7.054632744946273, "learning_rate": 9.607462728418948e-06, "loss": 18.0781, "step": 8380 }, { "epoch": 0.1531979454183194, "grad_norm": 7.178135813819243, "learning_rate": 9.607347750633824e-06, "loss": 17.4505, "step": 8381 }, { "epoch": 0.15321622461476594, "grad_norm": 6.747552815412986, "learning_rate": 9.607232756700345e-06, "loss": 17.7336, "step": 8382 }, { "epoch": 0.15323450381121245, "grad_norm": 6.898898806189686, "learning_rate": 9.607117746618916e-06, "loss": 17.9267, "step": 8383 }, { "epoch": 0.153252783007659, "grad_norm": 6.3177247531269884, "learning_rate": 9.607002720389938e-06, "loss": 17.5096, "step": 8384 }, { "epoch": 0.1532710622041055, "grad_norm": 7.065871314133091, "learning_rate": 9.606887678013817e-06, "loss": 17.6314, "step": 8385 }, { "epoch": 0.15328934140055203, "grad_norm": 6.845906887067965, "learning_rate": 9.606772619490952e-06, "loss": 17.5358, "step": 8386 }, { "epoch": 0.15330762059699857, "grad_norm": 5.547321384041577, "learning_rate": 9.60665754482175e-06, "loss": 17.0189, "step": 8387 }, { "epoch": 0.15332589979344508, "grad_norm": 7.777365897968196, "learning_rate": 9.606542454006614e-06, "loss": 17.3437, "step": 8388 }, { "epoch": 0.1533441789898916, "grad_norm": 6.35152422892199, "learning_rate": 9.606427347045945e-06, "loss": 17.3832, "step": 8389 }, { "epoch": 0.15336245818633812, "grad_norm": 7.921700262257623, "learning_rate": 9.606312223940149e-06, "loss": 18.2209, "step": 8390 }, { "epoch": 0.15338073738278465, "grad_norm": 8.568656423301833, "learning_rate": 9.606197084689628e-06, "loss": 17.256, "step": 8391 }, { "epoch": 0.1533990165792312, "grad_norm": 7.973003413749782, "learning_rate": 9.606081929294785e-06, "loss": 18.0763, "step": 8392 }, { "epoch": 0.1534172957756777, "grad_norm": 6.084094784940632, "learning_rate": 9.605966757756025e-06, "loss": 17.402, "step": 8393 }, { "epoch": 0.15343557497212423, "grad_norm": 7.196630568735634, "learning_rate": 9.605851570073751e-06, "loss": 17.8722, "step": 8394 }, { "epoch": 0.15345385416857074, "grad_norm": 7.565946134509732, "learning_rate": 9.605736366248368e-06, "loss": 17.703, "step": 8395 }, { "epoch": 0.15347213336501728, "grad_norm": 7.121086636966807, "learning_rate": 9.605621146280278e-06, "loss": 17.5359, "step": 8396 }, { "epoch": 0.15349041256146378, "grad_norm": 8.071905115767212, "learning_rate": 9.605505910169885e-06, "loss": 18.0466, "step": 8397 }, { "epoch": 0.15350869175791032, "grad_norm": 7.1637062157975135, "learning_rate": 9.605390657917594e-06, "loss": 17.9921, "step": 8398 }, { "epoch": 0.15352697095435686, "grad_norm": 6.79267118405408, "learning_rate": 9.605275389523809e-06, "loss": 17.437, "step": 8399 }, { "epoch": 0.15354525015080336, "grad_norm": 6.152885576362294, "learning_rate": 9.605160104988934e-06, "loss": 17.1758, "step": 8400 }, { "epoch": 0.1535635293472499, "grad_norm": 7.600485582114135, "learning_rate": 9.60504480431337e-06, "loss": 18.0655, "step": 8401 }, { "epoch": 0.1535818085436964, "grad_norm": 8.148246820658834, "learning_rate": 9.604929487497525e-06, "loss": 18.264, "step": 8402 }, { "epoch": 0.15360008774014294, "grad_norm": 7.703202854237343, "learning_rate": 9.604814154541801e-06, "loss": 18.1432, "step": 8403 }, { "epoch": 0.15361836693658948, "grad_norm": 6.731615825436619, "learning_rate": 9.604698805446604e-06, "loss": 17.361, "step": 8404 }, { "epoch": 0.153636646133036, "grad_norm": 7.287228303237335, "learning_rate": 9.604583440212338e-06, "loss": 17.8045, "step": 8405 }, { "epoch": 0.15365492532948252, "grad_norm": 6.349368019047275, "learning_rate": 9.604468058839405e-06, "loss": 17.2851, "step": 8406 }, { "epoch": 0.15367320452592903, "grad_norm": 6.740836440994955, "learning_rate": 9.604352661328212e-06, "loss": 17.4433, "step": 8407 }, { "epoch": 0.15369148372237557, "grad_norm": 6.536896433117541, "learning_rate": 9.604237247679162e-06, "loss": 17.553, "step": 8408 }, { "epoch": 0.1537097629188221, "grad_norm": 7.988312390636531, "learning_rate": 9.60412181789266e-06, "loss": 18.0865, "step": 8409 }, { "epoch": 0.1537280421152686, "grad_norm": 7.7923249945074025, "learning_rate": 9.604006371969111e-06, "loss": 17.7819, "step": 8410 }, { "epoch": 0.15374632131171515, "grad_norm": 7.338200333907157, "learning_rate": 9.603890909908917e-06, "loss": 17.9571, "step": 8411 }, { "epoch": 0.15376460050816165, "grad_norm": 7.761879206895382, "learning_rate": 9.603775431712487e-06, "loss": 17.6326, "step": 8412 }, { "epoch": 0.1537828797046082, "grad_norm": 6.48170202759222, "learning_rate": 9.603659937380223e-06, "loss": 17.4447, "step": 8413 }, { "epoch": 0.1538011589010547, "grad_norm": 7.836939441436887, "learning_rate": 9.60354442691253e-06, "loss": 17.8237, "step": 8414 }, { "epoch": 0.15381943809750123, "grad_norm": 6.719946252733968, "learning_rate": 9.603428900309815e-06, "loss": 17.6225, "step": 8415 }, { "epoch": 0.15383771729394777, "grad_norm": 7.542350517608685, "learning_rate": 9.60331335757248e-06, "loss": 17.9143, "step": 8416 }, { "epoch": 0.15385599649039428, "grad_norm": 6.557273227773028, "learning_rate": 9.60319779870093e-06, "loss": 17.6563, "step": 8417 }, { "epoch": 0.1538742756868408, "grad_norm": 7.687704941739925, "learning_rate": 9.603082223695572e-06, "loss": 17.8355, "step": 8418 }, { "epoch": 0.15389255488328732, "grad_norm": 5.9273300557097, "learning_rate": 9.602966632556812e-06, "loss": 17.266, "step": 8419 }, { "epoch": 0.15391083407973385, "grad_norm": 7.122823065006941, "learning_rate": 9.602851025285052e-06, "loss": 17.7552, "step": 8420 }, { "epoch": 0.1539291132761804, "grad_norm": 6.624075396223145, "learning_rate": 9.602735401880699e-06, "loss": 17.7015, "step": 8421 }, { "epoch": 0.1539473924726269, "grad_norm": 6.480000062232727, "learning_rate": 9.602619762344156e-06, "loss": 17.4433, "step": 8422 }, { "epoch": 0.15396567166907343, "grad_norm": 7.189402278528076, "learning_rate": 9.602504106675832e-06, "loss": 17.7305, "step": 8423 }, { "epoch": 0.15398395086551994, "grad_norm": 8.228315896538703, "learning_rate": 9.60238843487613e-06, "loss": 18.2283, "step": 8424 }, { "epoch": 0.15400223006196648, "grad_norm": 6.209607023376287, "learning_rate": 9.602272746945455e-06, "loss": 17.4764, "step": 8425 }, { "epoch": 0.154020509258413, "grad_norm": 7.474945940669149, "learning_rate": 9.602157042884214e-06, "loss": 18.2579, "step": 8426 }, { "epoch": 0.15403878845485952, "grad_norm": 8.210926673852006, "learning_rate": 9.602041322692811e-06, "loss": 17.5434, "step": 8427 }, { "epoch": 0.15405706765130606, "grad_norm": 6.769935715137518, "learning_rate": 9.601925586371655e-06, "loss": 17.7453, "step": 8428 }, { "epoch": 0.15407534684775256, "grad_norm": 6.101879424645034, "learning_rate": 9.601809833921148e-06, "loss": 17.1897, "step": 8429 }, { "epoch": 0.1540936260441991, "grad_norm": 7.916061720624111, "learning_rate": 9.601694065341697e-06, "loss": 18.0381, "step": 8430 }, { "epoch": 0.1541119052406456, "grad_norm": 8.024275521095763, "learning_rate": 9.601578280633707e-06, "loss": 18.1038, "step": 8431 }, { "epoch": 0.15413018443709214, "grad_norm": 6.076529789367406, "learning_rate": 9.601462479797585e-06, "loss": 17.3861, "step": 8432 }, { "epoch": 0.15414846363353868, "grad_norm": 6.127567418286736, "learning_rate": 9.601346662833735e-06, "loss": 17.5513, "step": 8433 }, { "epoch": 0.1541667428299852, "grad_norm": 7.639056821110453, "learning_rate": 9.601230829742566e-06, "loss": 17.6508, "step": 8434 }, { "epoch": 0.15418502202643172, "grad_norm": 5.856297758944837, "learning_rate": 9.601114980524481e-06, "loss": 17.1349, "step": 8435 }, { "epoch": 0.15420330122287823, "grad_norm": 7.046360103050307, "learning_rate": 9.600999115179888e-06, "loss": 17.7374, "step": 8436 }, { "epoch": 0.15422158041932477, "grad_norm": 8.808883027907436, "learning_rate": 9.600883233709192e-06, "loss": 18.8813, "step": 8437 }, { "epoch": 0.1542398596157713, "grad_norm": 6.65821248202838, "learning_rate": 9.6007673361128e-06, "loss": 17.6938, "step": 8438 }, { "epoch": 0.1542581388122178, "grad_norm": 8.040611572705162, "learning_rate": 9.600651422391116e-06, "loss": 17.9305, "step": 8439 }, { "epoch": 0.15427641800866435, "grad_norm": 6.922120606496327, "learning_rate": 9.600535492544551e-06, "loss": 17.6077, "step": 8440 }, { "epoch": 0.15429469720511085, "grad_norm": 6.183653454314233, "learning_rate": 9.600419546573506e-06, "loss": 17.3557, "step": 8441 }, { "epoch": 0.1543129764015574, "grad_norm": 7.595367575093661, "learning_rate": 9.60030358447839e-06, "loss": 18.0367, "step": 8442 }, { "epoch": 0.15433125559800392, "grad_norm": 5.957698249849723, "learning_rate": 9.60018760625961e-06, "loss": 17.3916, "step": 8443 }, { "epoch": 0.15434953479445043, "grad_norm": 7.12068244520129, "learning_rate": 9.60007161191757e-06, "loss": 17.7851, "step": 8444 }, { "epoch": 0.15436781399089697, "grad_norm": 6.939791736338509, "learning_rate": 9.59995560145268e-06, "loss": 17.8164, "step": 8445 }, { "epoch": 0.15438609318734348, "grad_norm": 6.039847122761099, "learning_rate": 9.599839574865345e-06, "loss": 17.2839, "step": 8446 }, { "epoch": 0.15440437238379, "grad_norm": 7.360061800195574, "learning_rate": 9.59972353215597e-06, "loss": 17.5683, "step": 8447 }, { "epoch": 0.15442265158023652, "grad_norm": 7.856934463667765, "learning_rate": 9.599607473324963e-06, "loss": 17.6077, "step": 8448 }, { "epoch": 0.15444093077668306, "grad_norm": 5.795155529479495, "learning_rate": 9.599491398372731e-06, "loss": 17.2855, "step": 8449 }, { "epoch": 0.1544592099731296, "grad_norm": 6.385509459357291, "learning_rate": 9.599375307299682e-06, "loss": 17.6995, "step": 8450 }, { "epoch": 0.1544774891695761, "grad_norm": 7.730295345935798, "learning_rate": 9.59925920010622e-06, "loss": 17.8271, "step": 8451 }, { "epoch": 0.15449576836602263, "grad_norm": 6.656709036539596, "learning_rate": 9.599143076792756e-06, "loss": 17.5784, "step": 8452 }, { "epoch": 0.15451404756246914, "grad_norm": 6.494362497958026, "learning_rate": 9.599026937359694e-06, "loss": 17.5043, "step": 8453 }, { "epoch": 0.15453232675891568, "grad_norm": 7.926585516267862, "learning_rate": 9.59891078180744e-06, "loss": 17.8609, "step": 8454 }, { "epoch": 0.1545506059553622, "grad_norm": 7.2283692594229745, "learning_rate": 9.598794610136405e-06, "loss": 17.9943, "step": 8455 }, { "epoch": 0.15456888515180872, "grad_norm": 8.839924010422484, "learning_rate": 9.598678422346992e-06, "loss": 18.5454, "step": 8456 }, { "epoch": 0.15458716434825526, "grad_norm": 6.560765416062664, "learning_rate": 9.598562218439612e-06, "loss": 17.6048, "step": 8457 }, { "epoch": 0.15460544354470176, "grad_norm": 5.979901552116303, "learning_rate": 9.598445998414668e-06, "loss": 17.2934, "step": 8458 }, { "epoch": 0.1546237227411483, "grad_norm": 7.905131329432131, "learning_rate": 9.598329762272571e-06, "loss": 18.2834, "step": 8459 }, { "epoch": 0.15464200193759484, "grad_norm": 7.238376643054584, "learning_rate": 9.598213510013728e-06, "loss": 17.7917, "step": 8460 }, { "epoch": 0.15466028113404134, "grad_norm": 8.377683188548191, "learning_rate": 9.598097241638544e-06, "loss": 18.7146, "step": 8461 }, { "epoch": 0.15467856033048788, "grad_norm": 8.320108141643493, "learning_rate": 9.59798095714743e-06, "loss": 18.2979, "step": 8462 }, { "epoch": 0.1546968395269344, "grad_norm": 5.638796431764524, "learning_rate": 9.597864656540789e-06, "loss": 17.1185, "step": 8463 }, { "epoch": 0.15471511872338092, "grad_norm": 7.106385638757528, "learning_rate": 9.597748339819035e-06, "loss": 17.7804, "step": 8464 }, { "epoch": 0.15473339791982743, "grad_norm": 6.978364375188652, "learning_rate": 9.597632006982569e-06, "loss": 17.5941, "step": 8465 }, { "epoch": 0.15475167711627397, "grad_norm": 5.698546235618529, "learning_rate": 9.597515658031804e-06, "loss": 17.3585, "step": 8466 }, { "epoch": 0.1547699563127205, "grad_norm": 8.112020435876998, "learning_rate": 9.597399292967144e-06, "loss": 18.0897, "step": 8467 }, { "epoch": 0.154788235509167, "grad_norm": 7.282222268329545, "learning_rate": 9.597282911789e-06, "loss": 18.0584, "step": 8468 }, { "epoch": 0.15480651470561355, "grad_norm": 7.765696488224575, "learning_rate": 9.597166514497777e-06, "loss": 17.7791, "step": 8469 }, { "epoch": 0.15482479390206005, "grad_norm": 6.515923023581861, "learning_rate": 9.597050101093885e-06, "loss": 17.3934, "step": 8470 }, { "epoch": 0.1548430730985066, "grad_norm": 7.251663888841731, "learning_rate": 9.596933671577731e-06, "loss": 17.911, "step": 8471 }, { "epoch": 0.15486135229495313, "grad_norm": 6.547575766660995, "learning_rate": 9.596817225949722e-06, "loss": 17.2965, "step": 8472 }, { "epoch": 0.15487963149139963, "grad_norm": 7.60869051294314, "learning_rate": 9.59670076421027e-06, "loss": 17.9197, "step": 8473 }, { "epoch": 0.15489791068784617, "grad_norm": 6.14338822215821, "learning_rate": 9.59658428635978e-06, "loss": 17.379, "step": 8474 }, { "epoch": 0.15491618988429268, "grad_norm": 6.5197838892209194, "learning_rate": 9.59646779239866e-06, "loss": 17.4835, "step": 8475 }, { "epoch": 0.1549344690807392, "grad_norm": 6.654093881851663, "learning_rate": 9.59635128232732e-06, "loss": 17.7212, "step": 8476 }, { "epoch": 0.15495274827718575, "grad_norm": 7.3700092470496195, "learning_rate": 9.596234756146167e-06, "loss": 17.4965, "step": 8477 }, { "epoch": 0.15497102747363226, "grad_norm": 6.659004832144627, "learning_rate": 9.596118213855611e-06, "loss": 17.415, "step": 8478 }, { "epoch": 0.1549893066700788, "grad_norm": 6.1720652632126685, "learning_rate": 9.596001655456059e-06, "loss": 17.6741, "step": 8479 }, { "epoch": 0.1550075858665253, "grad_norm": 7.032000921413487, "learning_rate": 9.59588508094792e-06, "loss": 18.3352, "step": 8480 }, { "epoch": 0.15502586506297183, "grad_norm": 6.243013315635809, "learning_rate": 9.595768490331603e-06, "loss": 17.2966, "step": 8481 }, { "epoch": 0.15504414425941834, "grad_norm": 7.426378919149949, "learning_rate": 9.595651883607514e-06, "loss": 17.4242, "step": 8482 }, { "epoch": 0.15506242345586488, "grad_norm": 7.1075562450734235, "learning_rate": 9.595535260776066e-06, "loss": 17.8095, "step": 8483 }, { "epoch": 0.15508070265231141, "grad_norm": 5.365278674569126, "learning_rate": 9.595418621837667e-06, "loss": 17.0794, "step": 8484 }, { "epoch": 0.15509898184875792, "grad_norm": 6.349419628952927, "learning_rate": 9.595301966792722e-06, "loss": 17.5486, "step": 8485 }, { "epoch": 0.15511726104520446, "grad_norm": 7.738224246855898, "learning_rate": 9.595185295641644e-06, "loss": 18.1471, "step": 8486 }, { "epoch": 0.15513554024165097, "grad_norm": 7.300456034374533, "learning_rate": 9.59506860838484e-06, "loss": 17.767, "step": 8487 }, { "epoch": 0.1551538194380975, "grad_norm": 6.674806091221157, "learning_rate": 9.594951905022718e-06, "loss": 17.3993, "step": 8488 }, { "epoch": 0.15517209863454404, "grad_norm": 5.689164721165032, "learning_rate": 9.594835185555688e-06, "loss": 17.0957, "step": 8489 }, { "epoch": 0.15519037783099054, "grad_norm": 7.221146427191833, "learning_rate": 9.594718449984162e-06, "loss": 17.7497, "step": 8490 }, { "epoch": 0.15520865702743708, "grad_norm": 6.531405122433829, "learning_rate": 9.594601698308545e-06, "loss": 17.5837, "step": 8491 }, { "epoch": 0.1552269362238836, "grad_norm": 7.893076460747014, "learning_rate": 9.594484930529248e-06, "loss": 18.0133, "step": 8492 }, { "epoch": 0.15524521542033012, "grad_norm": 6.635984489532212, "learning_rate": 9.59436814664668e-06, "loss": 17.7828, "step": 8493 }, { "epoch": 0.15526349461677666, "grad_norm": 7.302024162710131, "learning_rate": 9.594251346661249e-06, "loss": 17.8628, "step": 8494 }, { "epoch": 0.15528177381322317, "grad_norm": 6.549778065930616, "learning_rate": 9.594134530573367e-06, "loss": 17.3769, "step": 8495 }, { "epoch": 0.1553000530096697, "grad_norm": 7.195584202554672, "learning_rate": 9.594017698383442e-06, "loss": 17.7915, "step": 8496 }, { "epoch": 0.1553183322061162, "grad_norm": 9.34243831281511, "learning_rate": 9.593900850091885e-06, "loss": 18.2791, "step": 8497 }, { "epoch": 0.15533661140256275, "grad_norm": 5.988360475293336, "learning_rate": 9.593783985699101e-06, "loss": 17.5919, "step": 8498 }, { "epoch": 0.15535489059900925, "grad_norm": 6.715506159691648, "learning_rate": 9.593667105205506e-06, "loss": 17.8342, "step": 8499 }, { "epoch": 0.1553731697954558, "grad_norm": 6.318911904885308, "learning_rate": 9.593550208611505e-06, "loss": 17.4947, "step": 8500 }, { "epoch": 0.15539144899190233, "grad_norm": 8.154867457940789, "learning_rate": 9.59343329591751e-06, "loss": 18.4076, "step": 8501 }, { "epoch": 0.15540972818834883, "grad_norm": 6.3570347394540265, "learning_rate": 9.593316367123928e-06, "loss": 17.2772, "step": 8502 }, { "epoch": 0.15542800738479537, "grad_norm": 8.662408364427396, "learning_rate": 9.593199422231173e-06, "loss": 18.6454, "step": 8503 }, { "epoch": 0.15544628658124188, "grad_norm": 6.591927918941316, "learning_rate": 9.59308246123965e-06, "loss": 17.6183, "step": 8504 }, { "epoch": 0.1554645657776884, "grad_norm": 7.350871149854708, "learning_rate": 9.592965484149772e-06, "loss": 17.6501, "step": 8505 }, { "epoch": 0.15548284497413495, "grad_norm": 6.2228465194042615, "learning_rate": 9.59284849096195e-06, "loss": 17.3951, "step": 8506 }, { "epoch": 0.15550112417058146, "grad_norm": 7.340207689427119, "learning_rate": 9.592731481676592e-06, "loss": 17.8985, "step": 8507 }, { "epoch": 0.155519403367028, "grad_norm": 5.860694243527434, "learning_rate": 9.59261445629411e-06, "loss": 17.2409, "step": 8508 }, { "epoch": 0.1555376825634745, "grad_norm": 8.852024998275656, "learning_rate": 9.592497414814911e-06, "loss": 17.4417, "step": 8509 }, { "epoch": 0.15555596175992104, "grad_norm": 8.071592123190793, "learning_rate": 9.592380357239408e-06, "loss": 17.9372, "step": 8510 }, { "epoch": 0.15557424095636757, "grad_norm": 5.926252834765691, "learning_rate": 9.592263283568008e-06, "loss": 17.1788, "step": 8511 }, { "epoch": 0.15559252015281408, "grad_norm": 6.7011376664875915, "learning_rate": 9.592146193801127e-06, "loss": 17.6574, "step": 8512 }, { "epoch": 0.15561079934926061, "grad_norm": 6.303016037431096, "learning_rate": 9.59202908793917e-06, "loss": 17.4543, "step": 8513 }, { "epoch": 0.15562907854570712, "grad_norm": 7.412547093100808, "learning_rate": 9.59191196598255e-06, "loss": 18.2372, "step": 8514 }, { "epoch": 0.15564735774215366, "grad_norm": 6.53962961001332, "learning_rate": 9.591794827931679e-06, "loss": 17.4454, "step": 8515 }, { "epoch": 0.15566563693860017, "grad_norm": 5.892445713863878, "learning_rate": 9.591677673786963e-06, "loss": 17.3634, "step": 8516 }, { "epoch": 0.1556839161350467, "grad_norm": 6.433111180533362, "learning_rate": 9.591560503548816e-06, "loss": 17.5807, "step": 8517 }, { "epoch": 0.15570219533149324, "grad_norm": 7.891231384464062, "learning_rate": 9.591443317217647e-06, "loss": 18.1661, "step": 8518 }, { "epoch": 0.15572047452793975, "grad_norm": 6.588605284400302, "learning_rate": 9.591326114793871e-06, "loss": 17.68, "step": 8519 }, { "epoch": 0.15573875372438628, "grad_norm": 6.726872242898803, "learning_rate": 9.591208896277892e-06, "loss": 17.7196, "step": 8520 }, { "epoch": 0.1557570329208328, "grad_norm": 6.676150957695583, "learning_rate": 9.591091661670125e-06, "loss": 17.7211, "step": 8521 }, { "epoch": 0.15577531211727932, "grad_norm": 6.354196660907414, "learning_rate": 9.590974410970981e-06, "loss": 17.5494, "step": 8522 }, { "epoch": 0.15579359131372586, "grad_norm": 8.81674414046929, "learning_rate": 9.59085714418087e-06, "loss": 18.3573, "step": 8523 }, { "epoch": 0.15581187051017237, "grad_norm": 6.622294384275694, "learning_rate": 9.590739861300202e-06, "loss": 17.6632, "step": 8524 }, { "epoch": 0.1558301497066189, "grad_norm": 5.559659622657357, "learning_rate": 9.59062256232939e-06, "loss": 17.2316, "step": 8525 }, { "epoch": 0.1558484289030654, "grad_norm": 7.307350725079425, "learning_rate": 9.590505247268842e-06, "loss": 18.2184, "step": 8526 }, { "epoch": 0.15586670809951195, "grad_norm": 6.817131197650809, "learning_rate": 9.590387916118975e-06, "loss": 17.5981, "step": 8527 }, { "epoch": 0.15588498729595848, "grad_norm": 7.096014670023533, "learning_rate": 9.590270568880194e-06, "loss": 18.1983, "step": 8528 }, { "epoch": 0.155903266492405, "grad_norm": 7.992282933840935, "learning_rate": 9.590153205552914e-06, "loss": 18.0549, "step": 8529 }, { "epoch": 0.15592154568885153, "grad_norm": 6.723702128700275, "learning_rate": 9.590035826137546e-06, "loss": 17.6164, "step": 8530 }, { "epoch": 0.15593982488529803, "grad_norm": 6.69807799183464, "learning_rate": 9.5899184306345e-06, "loss": 17.5677, "step": 8531 }, { "epoch": 0.15595810408174457, "grad_norm": 7.331194963743321, "learning_rate": 9.589801019044188e-06, "loss": 17.8097, "step": 8532 }, { "epoch": 0.15597638327819108, "grad_norm": 6.734347726978225, "learning_rate": 9.589683591367022e-06, "loss": 17.5873, "step": 8533 }, { "epoch": 0.1559946624746376, "grad_norm": 11.856050505777045, "learning_rate": 9.589566147603413e-06, "loss": 17.5046, "step": 8534 }, { "epoch": 0.15601294167108415, "grad_norm": 7.222427471083795, "learning_rate": 9.589448687753773e-06, "loss": 17.6746, "step": 8535 }, { "epoch": 0.15603122086753066, "grad_norm": 7.3315001487201705, "learning_rate": 9.589331211818515e-06, "loss": 17.8191, "step": 8536 }, { "epoch": 0.1560495000639772, "grad_norm": 7.039387644346871, "learning_rate": 9.589213719798048e-06, "loss": 17.7906, "step": 8537 }, { "epoch": 0.1560677792604237, "grad_norm": 6.956771012404695, "learning_rate": 9.589096211692785e-06, "loss": 17.8115, "step": 8538 }, { "epoch": 0.15608605845687024, "grad_norm": 7.537196469773764, "learning_rate": 9.588978687503139e-06, "loss": 18.0115, "step": 8539 }, { "epoch": 0.15610433765331677, "grad_norm": 7.644245909337144, "learning_rate": 9.588861147229521e-06, "loss": 17.8212, "step": 8540 }, { "epoch": 0.15612261684976328, "grad_norm": 8.022592212437466, "learning_rate": 9.58874359087234e-06, "loss": 18.2235, "step": 8541 }, { "epoch": 0.15614089604620981, "grad_norm": 7.173768077180046, "learning_rate": 9.588626018432014e-06, "loss": 17.6495, "step": 8542 }, { "epoch": 0.15615917524265632, "grad_norm": 6.66266495172659, "learning_rate": 9.58850842990895e-06, "loss": 17.455, "step": 8543 }, { "epoch": 0.15617745443910286, "grad_norm": 5.842986755428583, "learning_rate": 9.588390825303564e-06, "loss": 17.1366, "step": 8544 }, { "epoch": 0.1561957336355494, "grad_norm": 7.2621323602948715, "learning_rate": 9.588273204616266e-06, "loss": 17.7071, "step": 8545 }, { "epoch": 0.1562140128319959, "grad_norm": 5.88392891932802, "learning_rate": 9.588155567847469e-06, "loss": 17.0827, "step": 8546 }, { "epoch": 0.15623229202844244, "grad_norm": 7.344325399835583, "learning_rate": 9.588037914997582e-06, "loss": 17.7232, "step": 8547 }, { "epoch": 0.15625057122488895, "grad_norm": 5.859663652010187, "learning_rate": 9.587920246067022e-06, "loss": 17.3981, "step": 8548 }, { "epoch": 0.15626885042133548, "grad_norm": 6.56078940492395, "learning_rate": 9.5878025610562e-06, "loss": 17.5337, "step": 8549 }, { "epoch": 0.156287129617782, "grad_norm": 7.681235712099995, "learning_rate": 9.587684859965529e-06, "loss": 17.8948, "step": 8550 }, { "epoch": 0.15630540881422852, "grad_norm": 7.524388588177178, "learning_rate": 9.587567142795419e-06, "loss": 17.806, "step": 8551 }, { "epoch": 0.15632368801067506, "grad_norm": 8.154780063802253, "learning_rate": 9.587449409546284e-06, "loss": 18.2139, "step": 8552 }, { "epoch": 0.15634196720712157, "grad_norm": 8.234716189725752, "learning_rate": 9.587331660218537e-06, "loss": 17.9248, "step": 8553 }, { "epoch": 0.1563602464035681, "grad_norm": 6.719842263902505, "learning_rate": 9.587213894812593e-06, "loss": 17.6152, "step": 8554 }, { "epoch": 0.1563785256000146, "grad_norm": 6.783205588697717, "learning_rate": 9.58709611332886e-06, "loss": 17.7465, "step": 8555 }, { "epoch": 0.15639680479646115, "grad_norm": 7.795942526111688, "learning_rate": 9.586978315767755e-06, "loss": 18.381, "step": 8556 }, { "epoch": 0.15641508399290768, "grad_norm": 7.623458633933016, "learning_rate": 9.586860502129686e-06, "loss": 17.8816, "step": 8557 }, { "epoch": 0.1564333631893542, "grad_norm": 6.789083733935318, "learning_rate": 9.586742672415073e-06, "loss": 17.8753, "step": 8558 }, { "epoch": 0.15645164238580073, "grad_norm": 7.157241078959216, "learning_rate": 9.586624826624322e-06, "loss": 17.9102, "step": 8559 }, { "epoch": 0.15646992158224723, "grad_norm": 5.642902572370437, "learning_rate": 9.586506964757849e-06, "loss": 17.266, "step": 8560 }, { "epoch": 0.15648820077869377, "grad_norm": 7.0361229462686525, "learning_rate": 9.586389086816068e-06, "loss": 17.6732, "step": 8561 }, { "epoch": 0.1565064799751403, "grad_norm": 8.400883349565074, "learning_rate": 9.586271192799392e-06, "loss": 18.2939, "step": 8562 }, { "epoch": 0.1565247591715868, "grad_norm": 7.9279995310918, "learning_rate": 9.586153282708233e-06, "loss": 18.1139, "step": 8563 }, { "epoch": 0.15654303836803335, "grad_norm": 5.758986063647926, "learning_rate": 9.586035356543005e-06, "loss": 17.2393, "step": 8564 }, { "epoch": 0.15656131756447986, "grad_norm": 6.874973190523697, "learning_rate": 9.585917414304119e-06, "loss": 17.6617, "step": 8565 }, { "epoch": 0.1565795967609264, "grad_norm": 8.324872630273362, "learning_rate": 9.58579945599199e-06, "loss": 18.464, "step": 8566 }, { "epoch": 0.1565978759573729, "grad_norm": 7.410527394539033, "learning_rate": 9.585681481607035e-06, "loss": 18.2697, "step": 8567 }, { "epoch": 0.15661615515381944, "grad_norm": 7.343080238999604, "learning_rate": 9.585563491149663e-06, "loss": 18.1351, "step": 8568 }, { "epoch": 0.15663443435026597, "grad_norm": 7.575183517132464, "learning_rate": 9.585445484620288e-06, "loss": 17.939, "step": 8569 }, { "epoch": 0.15665271354671248, "grad_norm": 8.1357613990169, "learning_rate": 9.585327462019327e-06, "loss": 18.5413, "step": 8570 }, { "epoch": 0.15667099274315902, "grad_norm": 6.779577490105369, "learning_rate": 9.585209423347188e-06, "loss": 17.4974, "step": 8571 }, { "epoch": 0.15668927193960552, "grad_norm": 8.07894326475309, "learning_rate": 9.58509136860429e-06, "loss": 17.9928, "step": 8572 }, { "epoch": 0.15670755113605206, "grad_norm": 7.0722506209810945, "learning_rate": 9.584973297791045e-06, "loss": 17.9897, "step": 8573 }, { "epoch": 0.1567258303324986, "grad_norm": 6.438500881550452, "learning_rate": 9.584855210907864e-06, "loss": 17.5617, "step": 8574 }, { "epoch": 0.1567441095289451, "grad_norm": 6.270999339681938, "learning_rate": 9.584737107955165e-06, "loss": 17.4895, "step": 8575 }, { "epoch": 0.15676238872539164, "grad_norm": 7.517518615524435, "learning_rate": 9.58461898893336e-06, "loss": 17.9209, "step": 8576 }, { "epoch": 0.15678066792183815, "grad_norm": 6.540675387314155, "learning_rate": 9.584500853842865e-06, "loss": 17.7416, "step": 8577 }, { "epoch": 0.15679894711828468, "grad_norm": 6.896809487961548, "learning_rate": 9.58438270268409e-06, "loss": 18.0654, "step": 8578 }, { "epoch": 0.15681722631473122, "grad_norm": 6.9106355894783436, "learning_rate": 9.58426453545745e-06, "loss": 17.3348, "step": 8579 }, { "epoch": 0.15683550551117773, "grad_norm": 6.73826832223587, "learning_rate": 9.584146352163365e-06, "loss": 17.4553, "step": 8580 }, { "epoch": 0.15685378470762426, "grad_norm": 7.9892625504157015, "learning_rate": 9.58402815280224e-06, "loss": 18.3169, "step": 8581 }, { "epoch": 0.15687206390407077, "grad_norm": 6.190261729230549, "learning_rate": 9.583909937374498e-06, "loss": 17.3757, "step": 8582 }, { "epoch": 0.1568903431005173, "grad_norm": 6.502355971704945, "learning_rate": 9.583791705880548e-06, "loss": 17.386, "step": 8583 }, { "epoch": 0.1569086222969638, "grad_norm": 5.691456524327374, "learning_rate": 9.583673458320806e-06, "loss": 17.205, "step": 8584 }, { "epoch": 0.15692690149341035, "grad_norm": 8.101221139386617, "learning_rate": 9.583555194695686e-06, "loss": 18.3278, "step": 8585 }, { "epoch": 0.15694518068985688, "grad_norm": 7.13921274085771, "learning_rate": 9.583436915005602e-06, "loss": 17.7824, "step": 8586 }, { "epoch": 0.1569634598863034, "grad_norm": 7.508935075788792, "learning_rate": 9.583318619250973e-06, "loss": 17.8597, "step": 8587 }, { "epoch": 0.15698173908274993, "grad_norm": 6.290705774851627, "learning_rate": 9.583200307432206e-06, "loss": 17.4956, "step": 8588 }, { "epoch": 0.15700001827919643, "grad_norm": 7.18031074118022, "learning_rate": 9.58308197954972e-06, "loss": 17.493, "step": 8589 }, { "epoch": 0.15701829747564297, "grad_norm": 7.055950999653975, "learning_rate": 9.58296363560393e-06, "loss": 17.7738, "step": 8590 }, { "epoch": 0.1570365766720895, "grad_norm": 6.898125200720675, "learning_rate": 9.582845275595252e-06, "loss": 17.6118, "step": 8591 }, { "epoch": 0.15705485586853601, "grad_norm": 6.822839256959291, "learning_rate": 9.582726899524096e-06, "loss": 17.5974, "step": 8592 }, { "epoch": 0.15707313506498255, "grad_norm": 7.498924502404051, "learning_rate": 9.582608507390883e-06, "loss": 17.9733, "step": 8593 }, { "epoch": 0.15709141426142906, "grad_norm": 6.109250911940779, "learning_rate": 9.582490099196023e-06, "loss": 17.4529, "step": 8594 }, { "epoch": 0.1571096934578756, "grad_norm": 7.184334856210555, "learning_rate": 9.582371674939932e-06, "loss": 17.7924, "step": 8595 }, { "epoch": 0.15712797265432213, "grad_norm": 6.6232562938971755, "learning_rate": 9.582253234623027e-06, "loss": 17.388, "step": 8596 }, { "epoch": 0.15714625185076864, "grad_norm": 7.165670758294181, "learning_rate": 9.582134778245722e-06, "loss": 17.7685, "step": 8597 }, { "epoch": 0.15716453104721517, "grad_norm": 6.425743050384466, "learning_rate": 9.582016305808433e-06, "loss": 17.3309, "step": 8598 }, { "epoch": 0.15718281024366168, "grad_norm": 6.040470941431053, "learning_rate": 9.581897817311571e-06, "loss": 17.4582, "step": 8599 }, { "epoch": 0.15720108944010822, "grad_norm": 6.525849640296614, "learning_rate": 9.58177931275556e-06, "loss": 17.5882, "step": 8600 }, { "epoch": 0.15721936863655472, "grad_norm": 6.830962980849063, "learning_rate": 9.581660792140807e-06, "loss": 17.5274, "step": 8601 }, { "epoch": 0.15723764783300126, "grad_norm": 5.97855634507643, "learning_rate": 9.58154225546773e-06, "loss": 17.4051, "step": 8602 }, { "epoch": 0.1572559270294478, "grad_norm": 8.005231048138352, "learning_rate": 9.581423702736747e-06, "loss": 18.2805, "step": 8603 }, { "epoch": 0.1572742062258943, "grad_norm": 6.965833565128532, "learning_rate": 9.581305133948269e-06, "loss": 17.9589, "step": 8604 }, { "epoch": 0.15729248542234084, "grad_norm": 8.307702529661622, "learning_rate": 9.581186549102717e-06, "loss": 18.2579, "step": 8605 }, { "epoch": 0.15731076461878735, "grad_norm": 6.439174824316372, "learning_rate": 9.581067948200503e-06, "loss": 17.3737, "step": 8606 }, { "epoch": 0.15732904381523388, "grad_norm": 7.123205864341623, "learning_rate": 9.580949331242042e-06, "loss": 17.5323, "step": 8607 }, { "epoch": 0.15734732301168042, "grad_norm": 7.049663918196313, "learning_rate": 9.58083069822775e-06, "loss": 17.8473, "step": 8608 }, { "epoch": 0.15736560220812693, "grad_norm": 6.139725136135639, "learning_rate": 9.580712049158046e-06, "loss": 17.2127, "step": 8609 }, { "epoch": 0.15738388140457346, "grad_norm": 8.677418539267073, "learning_rate": 9.580593384033343e-06, "loss": 18.3846, "step": 8610 }, { "epoch": 0.15740216060101997, "grad_norm": 7.20631993395279, "learning_rate": 9.580474702854058e-06, "loss": 18.1721, "step": 8611 }, { "epoch": 0.1574204397974665, "grad_norm": 6.5505428804457795, "learning_rate": 9.580356005620608e-06, "loss": 17.4097, "step": 8612 }, { "epoch": 0.15743871899391304, "grad_norm": 7.123841848182562, "learning_rate": 9.580237292333406e-06, "loss": 17.8687, "step": 8613 }, { "epoch": 0.15745699819035955, "grad_norm": 6.39697252165992, "learning_rate": 9.580118562992868e-06, "loss": 17.3785, "step": 8614 }, { "epoch": 0.15747527738680608, "grad_norm": 7.070870038480495, "learning_rate": 9.579999817599415e-06, "loss": 17.692, "step": 8615 }, { "epoch": 0.1574935565832526, "grad_norm": 6.26053032908961, "learning_rate": 9.579881056153459e-06, "loss": 17.2914, "step": 8616 }, { "epoch": 0.15751183577969913, "grad_norm": 6.697145280584922, "learning_rate": 9.579762278655417e-06, "loss": 17.5871, "step": 8617 }, { "epoch": 0.15753011497614564, "grad_norm": 7.715165824108414, "learning_rate": 9.579643485105706e-06, "loss": 18.0104, "step": 8618 }, { "epoch": 0.15754839417259217, "grad_norm": 6.722127511425423, "learning_rate": 9.579524675504743e-06, "loss": 17.7836, "step": 8619 }, { "epoch": 0.1575666733690387, "grad_norm": 8.465236155753898, "learning_rate": 9.579405849852942e-06, "loss": 18.3863, "step": 8620 }, { "epoch": 0.15758495256548521, "grad_norm": 7.765050473720229, "learning_rate": 9.579287008150721e-06, "loss": 18.0953, "step": 8621 }, { "epoch": 0.15760323176193175, "grad_norm": 7.757385280429804, "learning_rate": 9.579168150398496e-06, "loss": 18.1099, "step": 8622 }, { "epoch": 0.15762151095837826, "grad_norm": 6.575447720202369, "learning_rate": 9.579049276596684e-06, "loss": 17.8145, "step": 8623 }, { "epoch": 0.1576397901548248, "grad_norm": 7.214456986634153, "learning_rate": 9.578930386745704e-06, "loss": 17.5937, "step": 8624 }, { "epoch": 0.15765806935127133, "grad_norm": 6.66633688657013, "learning_rate": 9.578811480845968e-06, "loss": 17.6349, "step": 8625 }, { "epoch": 0.15767634854771784, "grad_norm": 8.18054623746064, "learning_rate": 9.578692558897895e-06, "loss": 18.2933, "step": 8626 }, { "epoch": 0.15769462774416437, "grad_norm": 5.721921894328776, "learning_rate": 9.578573620901903e-06, "loss": 17.0205, "step": 8627 }, { "epoch": 0.15771290694061088, "grad_norm": 6.523186426909358, "learning_rate": 9.578454666858408e-06, "loss": 17.5072, "step": 8628 }, { "epoch": 0.15773118613705742, "grad_norm": 6.5150941377836205, "learning_rate": 9.578335696767825e-06, "loss": 17.5661, "step": 8629 }, { "epoch": 0.15774946533350395, "grad_norm": 6.904503266798888, "learning_rate": 9.578216710630574e-06, "loss": 17.4964, "step": 8630 }, { "epoch": 0.15776774452995046, "grad_norm": 6.901044540590434, "learning_rate": 9.57809770844707e-06, "loss": 17.551, "step": 8631 }, { "epoch": 0.157786023726397, "grad_norm": 6.8625124003165885, "learning_rate": 9.577978690217732e-06, "loss": 17.514, "step": 8632 }, { "epoch": 0.1578043029228435, "grad_norm": 6.79711657068216, "learning_rate": 9.577859655942975e-06, "loss": 17.5851, "step": 8633 }, { "epoch": 0.15782258211929004, "grad_norm": 7.428381409221911, "learning_rate": 9.577740605623218e-06, "loss": 18.121, "step": 8634 }, { "epoch": 0.15784086131573655, "grad_norm": 9.082209566902788, "learning_rate": 9.577621539258876e-06, "loss": 18.3479, "step": 8635 }, { "epoch": 0.15785914051218308, "grad_norm": 6.739861152037539, "learning_rate": 9.577502456850368e-06, "loss": 17.7048, "step": 8636 }, { "epoch": 0.15787741970862962, "grad_norm": 6.52170095545089, "learning_rate": 9.577383358398111e-06, "loss": 17.3415, "step": 8637 }, { "epoch": 0.15789569890507613, "grad_norm": 6.253937057566765, "learning_rate": 9.577264243902524e-06, "loss": 17.3816, "step": 8638 }, { "epoch": 0.15791397810152266, "grad_norm": 6.5586503017309035, "learning_rate": 9.577145113364022e-06, "loss": 17.7927, "step": 8639 }, { "epoch": 0.15793225729796917, "grad_norm": 6.822586486919763, "learning_rate": 9.577025966783025e-06, "loss": 17.5244, "step": 8640 }, { "epoch": 0.1579505364944157, "grad_norm": 6.156764339679498, "learning_rate": 9.576906804159947e-06, "loss": 17.5124, "step": 8641 }, { "epoch": 0.15796881569086224, "grad_norm": 7.525565901915947, "learning_rate": 9.57678762549521e-06, "loss": 17.9959, "step": 8642 }, { "epoch": 0.15798709488730875, "grad_norm": 7.573590715549222, "learning_rate": 9.576668430789227e-06, "loss": 17.8283, "step": 8643 }, { "epoch": 0.15800537408375528, "grad_norm": 5.465960370810305, "learning_rate": 9.576549220042419e-06, "loss": 17.0043, "step": 8644 }, { "epoch": 0.1580236532802018, "grad_norm": 7.507270295043513, "learning_rate": 9.576429993255203e-06, "loss": 18.4469, "step": 8645 }, { "epoch": 0.15804193247664833, "grad_norm": 8.381314359700884, "learning_rate": 9.576310750427998e-06, "loss": 18.4688, "step": 8646 }, { "epoch": 0.15806021167309486, "grad_norm": 5.195004795725945, "learning_rate": 9.57619149156122e-06, "loss": 17.1027, "step": 8647 }, { "epoch": 0.15807849086954137, "grad_norm": 6.5466336494995305, "learning_rate": 9.57607221665529e-06, "loss": 17.6005, "step": 8648 }, { "epoch": 0.1580967700659879, "grad_norm": 7.650693031787093, "learning_rate": 9.57595292571062e-06, "loss": 17.7174, "step": 8649 }, { "epoch": 0.15811504926243442, "grad_norm": 7.524522513133388, "learning_rate": 9.575833618727637e-06, "loss": 18.1352, "step": 8650 }, { "epoch": 0.15813332845888095, "grad_norm": 7.076198658972048, "learning_rate": 9.575714295706751e-06, "loss": 17.7296, "step": 8651 }, { "epoch": 0.15815160765532746, "grad_norm": 9.334436224397965, "learning_rate": 9.575594956648384e-06, "loss": 18.0572, "step": 8652 }, { "epoch": 0.158169886851774, "grad_norm": 6.865725757427967, "learning_rate": 9.575475601552955e-06, "loss": 17.5508, "step": 8653 }, { "epoch": 0.15818816604822053, "grad_norm": 5.968781954249258, "learning_rate": 9.57535623042088e-06, "loss": 17.2271, "step": 8654 }, { "epoch": 0.15820644524466704, "grad_norm": 7.927357273268601, "learning_rate": 9.575236843252578e-06, "loss": 17.9615, "step": 8655 }, { "epoch": 0.15822472444111357, "grad_norm": 6.147631388732034, "learning_rate": 9.575117440048469e-06, "loss": 17.5935, "step": 8656 }, { "epoch": 0.15824300363756008, "grad_norm": 6.664098577906064, "learning_rate": 9.574998020808969e-06, "loss": 17.3875, "step": 8657 }, { "epoch": 0.15826128283400662, "grad_norm": 7.039980596920565, "learning_rate": 9.574878585534498e-06, "loss": 17.7298, "step": 8658 }, { "epoch": 0.15827956203045315, "grad_norm": 8.43221093342362, "learning_rate": 9.574759134225476e-06, "loss": 18.1742, "step": 8659 }, { "epoch": 0.15829784122689966, "grad_norm": 7.32183485146767, "learning_rate": 9.574639666882319e-06, "loss": 17.7696, "step": 8660 }, { "epoch": 0.1583161204233462, "grad_norm": 8.225441049432984, "learning_rate": 9.574520183505447e-06, "loss": 18.4105, "step": 8661 }, { "epoch": 0.1583343996197927, "grad_norm": 7.767409983133676, "learning_rate": 9.57440068409528e-06, "loss": 18.0198, "step": 8662 }, { "epoch": 0.15835267881623924, "grad_norm": 7.541541219196348, "learning_rate": 9.574281168652234e-06, "loss": 17.8972, "step": 8663 }, { "epoch": 0.15837095801268578, "grad_norm": 7.343574019287776, "learning_rate": 9.57416163717673e-06, "loss": 17.9099, "step": 8664 }, { "epoch": 0.15838923720913228, "grad_norm": 6.989636427625274, "learning_rate": 9.574042089669186e-06, "loss": 17.8249, "step": 8665 }, { "epoch": 0.15840751640557882, "grad_norm": 7.189037334290815, "learning_rate": 9.573922526130021e-06, "loss": 18.0304, "step": 8666 }, { "epoch": 0.15842579560202533, "grad_norm": 7.552814407373591, "learning_rate": 9.573802946559656e-06, "loss": 18.1647, "step": 8667 }, { "epoch": 0.15844407479847186, "grad_norm": 7.719845968675812, "learning_rate": 9.57368335095851e-06, "loss": 18.1486, "step": 8668 }, { "epoch": 0.15846235399491837, "grad_norm": 7.165440768937146, "learning_rate": 9.573563739326997e-06, "loss": 18.0737, "step": 8669 }, { "epoch": 0.1584806331913649, "grad_norm": 7.197366971362831, "learning_rate": 9.573444111665542e-06, "loss": 17.9356, "step": 8670 }, { "epoch": 0.15849891238781144, "grad_norm": 6.445771316820499, "learning_rate": 9.573324467974562e-06, "loss": 17.5002, "step": 8671 }, { "epoch": 0.15851719158425795, "grad_norm": 7.326785099838132, "learning_rate": 9.573204808254476e-06, "loss": 17.7761, "step": 8672 }, { "epoch": 0.15853547078070448, "grad_norm": 6.1440248773733375, "learning_rate": 9.573085132505705e-06, "loss": 17.5076, "step": 8673 }, { "epoch": 0.158553749977151, "grad_norm": 7.317194130755906, "learning_rate": 9.572965440728667e-06, "loss": 17.8295, "step": 8674 }, { "epoch": 0.15857202917359753, "grad_norm": 6.759062435788617, "learning_rate": 9.572845732923781e-06, "loss": 17.4328, "step": 8675 }, { "epoch": 0.15859030837004406, "grad_norm": 6.7954543106881715, "learning_rate": 9.572726009091469e-06, "loss": 17.6332, "step": 8676 }, { "epoch": 0.15860858756649057, "grad_norm": 8.776539660855445, "learning_rate": 9.572606269232148e-06, "loss": 18.7997, "step": 8677 }, { "epoch": 0.1586268667629371, "grad_norm": 5.6294904211526005, "learning_rate": 9.572486513346239e-06, "loss": 17.1821, "step": 8678 }, { "epoch": 0.15864514595938362, "grad_norm": 6.258365595852227, "learning_rate": 9.572366741434163e-06, "loss": 17.564, "step": 8679 }, { "epoch": 0.15866342515583015, "grad_norm": 11.479545019600947, "learning_rate": 9.572246953496336e-06, "loss": 18.2172, "step": 8680 }, { "epoch": 0.1586817043522767, "grad_norm": 6.403658755230618, "learning_rate": 9.572127149533182e-06, "loss": 17.5483, "step": 8681 }, { "epoch": 0.1586999835487232, "grad_norm": 5.101281347483226, "learning_rate": 9.572007329545119e-06, "loss": 16.9884, "step": 8682 }, { "epoch": 0.15871826274516973, "grad_norm": 9.981835161632372, "learning_rate": 9.571887493532566e-06, "loss": 17.8174, "step": 8683 }, { "epoch": 0.15873654194161624, "grad_norm": 6.548260835919465, "learning_rate": 9.571767641495944e-06, "loss": 17.3436, "step": 8684 }, { "epoch": 0.15875482113806277, "grad_norm": 6.8461827153199275, "learning_rate": 9.571647773435674e-06, "loss": 17.8834, "step": 8685 }, { "epoch": 0.15877310033450928, "grad_norm": 8.502495700321242, "learning_rate": 9.571527889352174e-06, "loss": 18.5276, "step": 8686 }, { "epoch": 0.15879137953095582, "grad_norm": 6.176439201496272, "learning_rate": 9.571407989245866e-06, "loss": 17.4895, "step": 8687 }, { "epoch": 0.15880965872740235, "grad_norm": 6.022477784888412, "learning_rate": 9.571288073117171e-06, "loss": 17.6312, "step": 8688 }, { "epoch": 0.15882793792384886, "grad_norm": 7.118900886235516, "learning_rate": 9.571168140966506e-06, "loss": 17.8077, "step": 8689 }, { "epoch": 0.1588462171202954, "grad_norm": 7.404382215336522, "learning_rate": 9.571048192794297e-06, "loss": 17.7534, "step": 8690 }, { "epoch": 0.1588644963167419, "grad_norm": 7.673872954903309, "learning_rate": 9.570928228600957e-06, "loss": 18.2886, "step": 8691 }, { "epoch": 0.15888277551318844, "grad_norm": 8.213756221609747, "learning_rate": 9.570808248386911e-06, "loss": 18.2046, "step": 8692 }, { "epoch": 0.15890105470963498, "grad_norm": 6.004541473602897, "learning_rate": 9.57068825215258e-06, "loss": 17.3993, "step": 8693 }, { "epoch": 0.15891933390608148, "grad_norm": 7.67699382473935, "learning_rate": 9.570568239898383e-06, "loss": 17.8866, "step": 8694 }, { "epoch": 0.15893761310252802, "grad_norm": 7.801825759335127, "learning_rate": 9.570448211624738e-06, "loss": 17.812, "step": 8695 }, { "epoch": 0.15895589229897453, "grad_norm": 6.661143090418699, "learning_rate": 9.570328167332072e-06, "loss": 17.5459, "step": 8696 }, { "epoch": 0.15897417149542106, "grad_norm": 6.379851837271899, "learning_rate": 9.570208107020802e-06, "loss": 17.7867, "step": 8697 }, { "epoch": 0.1589924506918676, "grad_norm": 7.174447656296335, "learning_rate": 9.570088030691348e-06, "loss": 17.6017, "step": 8698 }, { "epoch": 0.1590107298883141, "grad_norm": 7.3451704570964695, "learning_rate": 9.569967938344134e-06, "loss": 18.1274, "step": 8699 }, { "epoch": 0.15902900908476064, "grad_norm": 7.1401695622395485, "learning_rate": 9.569847829979577e-06, "loss": 17.5868, "step": 8700 }, { "epoch": 0.15904728828120715, "grad_norm": 7.1894016334097985, "learning_rate": 9.5697277055981e-06, "loss": 17.7075, "step": 8701 }, { "epoch": 0.15906556747765369, "grad_norm": 6.458617217159287, "learning_rate": 9.569607565200123e-06, "loss": 17.5532, "step": 8702 }, { "epoch": 0.1590838466741002, "grad_norm": 6.825022394793046, "learning_rate": 9.56948740878607e-06, "loss": 17.9135, "step": 8703 }, { "epoch": 0.15910212587054673, "grad_norm": 6.28830097810962, "learning_rate": 9.56936723635636e-06, "loss": 17.3174, "step": 8704 }, { "epoch": 0.15912040506699326, "grad_norm": 7.956355899333767, "learning_rate": 9.569247047911414e-06, "loss": 17.7205, "step": 8705 }, { "epoch": 0.15913868426343977, "grad_norm": 6.332582650391563, "learning_rate": 9.569126843451652e-06, "loss": 17.463, "step": 8706 }, { "epoch": 0.1591569634598863, "grad_norm": 7.004815411625668, "learning_rate": 9.569006622977499e-06, "loss": 17.6234, "step": 8707 }, { "epoch": 0.15917524265633282, "grad_norm": 8.335114969107142, "learning_rate": 9.568886386489373e-06, "loss": 18.0666, "step": 8708 }, { "epoch": 0.15919352185277935, "grad_norm": 5.926383063830562, "learning_rate": 9.568766133987698e-06, "loss": 17.3391, "step": 8709 }, { "epoch": 0.1592118010492259, "grad_norm": 6.491640226766058, "learning_rate": 9.568645865472893e-06, "loss": 17.6675, "step": 8710 }, { "epoch": 0.1592300802456724, "grad_norm": 6.932616478421611, "learning_rate": 9.568525580945382e-06, "loss": 17.6239, "step": 8711 }, { "epoch": 0.15924835944211893, "grad_norm": 7.634918806624296, "learning_rate": 9.568405280405583e-06, "loss": 18.0179, "step": 8712 }, { "epoch": 0.15926663863856544, "grad_norm": 7.197427622249253, "learning_rate": 9.568284963853923e-06, "loss": 17.6312, "step": 8713 }, { "epoch": 0.15928491783501197, "grad_norm": 8.088800271659048, "learning_rate": 9.568164631290819e-06, "loss": 18.0679, "step": 8714 }, { "epoch": 0.1593031970314585, "grad_norm": 6.445077687085442, "learning_rate": 9.568044282716695e-06, "loss": 17.4712, "step": 8715 }, { "epoch": 0.15932147622790502, "grad_norm": 6.698055899410576, "learning_rate": 9.567923918131971e-06, "loss": 17.9143, "step": 8716 }, { "epoch": 0.15933975542435155, "grad_norm": 7.635732485280628, "learning_rate": 9.567803537537071e-06, "loss": 18.2345, "step": 8717 }, { "epoch": 0.15935803462079806, "grad_norm": 6.9098385174088675, "learning_rate": 9.567683140932415e-06, "loss": 17.914, "step": 8718 }, { "epoch": 0.1593763138172446, "grad_norm": 7.689218781560204, "learning_rate": 9.567562728318426e-06, "loss": 18.0113, "step": 8719 }, { "epoch": 0.1593945930136911, "grad_norm": 5.908448434642699, "learning_rate": 9.567442299695526e-06, "loss": 17.334, "step": 8720 }, { "epoch": 0.15941287221013764, "grad_norm": 7.383001977435694, "learning_rate": 9.567321855064137e-06, "loss": 18.0264, "step": 8721 }, { "epoch": 0.15943115140658418, "grad_norm": 6.0762845717344485, "learning_rate": 9.567201394424683e-06, "loss": 17.2396, "step": 8722 }, { "epoch": 0.15944943060303068, "grad_norm": 7.249071343599725, "learning_rate": 9.567080917777582e-06, "loss": 18.3273, "step": 8723 }, { "epoch": 0.15946770979947722, "grad_norm": 6.79927527377071, "learning_rate": 9.566960425123262e-06, "loss": 17.464, "step": 8724 }, { "epoch": 0.15948598899592373, "grad_norm": 6.032798301046214, "learning_rate": 9.566839916462139e-06, "loss": 17.1785, "step": 8725 }, { "epoch": 0.15950426819237026, "grad_norm": 7.2277709946029205, "learning_rate": 9.566719391794639e-06, "loss": 17.6834, "step": 8726 }, { "epoch": 0.1595225473888168, "grad_norm": 7.274076078711354, "learning_rate": 9.566598851121184e-06, "loss": 17.9066, "step": 8727 }, { "epoch": 0.1595408265852633, "grad_norm": 7.194632883119047, "learning_rate": 9.566478294442197e-06, "loss": 17.8001, "step": 8728 }, { "epoch": 0.15955910578170984, "grad_norm": 7.523618385263561, "learning_rate": 9.566357721758099e-06, "loss": 17.9979, "step": 8729 }, { "epoch": 0.15957738497815635, "grad_norm": 5.817974321190137, "learning_rate": 9.566237133069314e-06, "loss": 17.3732, "step": 8730 }, { "epoch": 0.15959566417460289, "grad_norm": 7.13909383047071, "learning_rate": 9.566116528376264e-06, "loss": 17.8938, "step": 8731 }, { "epoch": 0.15961394337104942, "grad_norm": 8.348706340397504, "learning_rate": 9.56599590767937e-06, "loss": 18.6279, "step": 8732 }, { "epoch": 0.15963222256749593, "grad_norm": 5.503249224940718, "learning_rate": 9.56587527097906e-06, "loss": 17.1792, "step": 8733 }, { "epoch": 0.15965050176394247, "grad_norm": 6.56382302545479, "learning_rate": 9.56575461827575e-06, "loss": 17.7867, "step": 8734 }, { "epoch": 0.15966878096038897, "grad_norm": 7.307967686522048, "learning_rate": 9.565633949569869e-06, "loss": 17.6034, "step": 8735 }, { "epoch": 0.1596870601568355, "grad_norm": 6.929338459157738, "learning_rate": 9.565513264861837e-06, "loss": 17.5525, "step": 8736 }, { "epoch": 0.15970533935328202, "grad_norm": 6.443022151956936, "learning_rate": 9.565392564152074e-06, "loss": 17.477, "step": 8737 }, { "epoch": 0.15972361854972855, "grad_norm": 7.241973816833101, "learning_rate": 9.56527184744101e-06, "loss": 17.5325, "step": 8738 }, { "epoch": 0.1597418977461751, "grad_norm": 6.563851454103735, "learning_rate": 9.565151114729063e-06, "loss": 17.4029, "step": 8739 }, { "epoch": 0.1597601769426216, "grad_norm": 6.508498858443748, "learning_rate": 9.565030366016656e-06, "loss": 17.6719, "step": 8740 }, { "epoch": 0.15977845613906813, "grad_norm": 6.456972815731345, "learning_rate": 9.564909601304215e-06, "loss": 17.4541, "step": 8741 }, { "epoch": 0.15979673533551464, "grad_norm": 6.163034436791755, "learning_rate": 9.564788820592162e-06, "loss": 17.3692, "step": 8742 }, { "epoch": 0.15981501453196117, "grad_norm": 5.575126929964949, "learning_rate": 9.564668023880921e-06, "loss": 17.1237, "step": 8743 }, { "epoch": 0.1598332937284077, "grad_norm": 7.64526785634227, "learning_rate": 9.564547211170914e-06, "loss": 18.0375, "step": 8744 }, { "epoch": 0.15985157292485422, "grad_norm": 7.417285485026281, "learning_rate": 9.564426382462564e-06, "loss": 17.5785, "step": 8745 }, { "epoch": 0.15986985212130075, "grad_norm": 6.4554490728201195, "learning_rate": 9.564305537756298e-06, "loss": 17.3587, "step": 8746 }, { "epoch": 0.15988813131774726, "grad_norm": 7.298964984532181, "learning_rate": 9.564184677052536e-06, "loss": 18.0229, "step": 8747 }, { "epoch": 0.1599064105141938, "grad_norm": 6.613225166292407, "learning_rate": 9.564063800351702e-06, "loss": 17.5211, "step": 8748 }, { "epoch": 0.15992468971064033, "grad_norm": 9.573566077771629, "learning_rate": 9.56394290765422e-06, "loss": 18.4496, "step": 8749 }, { "epoch": 0.15994296890708684, "grad_norm": 6.406176375922915, "learning_rate": 9.563821998960516e-06, "loss": 17.5678, "step": 8750 }, { "epoch": 0.15996124810353338, "grad_norm": 6.260061914930123, "learning_rate": 9.56370107427101e-06, "loss": 17.409, "step": 8751 }, { "epoch": 0.15997952729997988, "grad_norm": 6.754535119047297, "learning_rate": 9.56358013358613e-06, "loss": 17.56, "step": 8752 }, { "epoch": 0.15999780649642642, "grad_norm": 6.432051614070615, "learning_rate": 9.563459176906296e-06, "loss": 17.7027, "step": 8753 }, { "epoch": 0.16001608569287293, "grad_norm": 7.594418566449968, "learning_rate": 9.563338204231933e-06, "loss": 17.9458, "step": 8754 }, { "epoch": 0.16003436488931946, "grad_norm": 7.371403461236743, "learning_rate": 9.563217215563468e-06, "loss": 17.6388, "step": 8755 }, { "epoch": 0.160052644085766, "grad_norm": 8.453908317655701, "learning_rate": 9.563096210901321e-06, "loss": 18.4992, "step": 8756 }, { "epoch": 0.1600709232822125, "grad_norm": 7.239148752398614, "learning_rate": 9.562975190245917e-06, "loss": 17.2711, "step": 8757 }, { "epoch": 0.16008920247865904, "grad_norm": 7.535249967549871, "learning_rate": 9.562854153597682e-06, "loss": 18.0152, "step": 8758 }, { "epoch": 0.16010748167510555, "grad_norm": 6.377766363405279, "learning_rate": 9.56273310095704e-06, "loss": 17.6365, "step": 8759 }, { "epoch": 0.1601257608715521, "grad_norm": 5.729475222732704, "learning_rate": 9.562612032324414e-06, "loss": 17.0194, "step": 8760 }, { "epoch": 0.16014404006799862, "grad_norm": 6.904956145505646, "learning_rate": 9.562490947700228e-06, "loss": 17.6042, "step": 8761 }, { "epoch": 0.16016231926444513, "grad_norm": 5.786519415904625, "learning_rate": 9.562369847084906e-06, "loss": 17.294, "step": 8762 }, { "epoch": 0.16018059846089167, "grad_norm": 6.966187542125642, "learning_rate": 9.562248730478875e-06, "loss": 17.7972, "step": 8763 }, { "epoch": 0.16019887765733817, "grad_norm": 6.957106629142053, "learning_rate": 9.56212759788256e-06, "loss": 17.7768, "step": 8764 }, { "epoch": 0.1602171568537847, "grad_norm": 7.653301452414369, "learning_rate": 9.562006449296381e-06, "loss": 17.6291, "step": 8765 }, { "epoch": 0.16023543605023124, "grad_norm": 7.294945839487996, "learning_rate": 9.561885284720767e-06, "loss": 17.8276, "step": 8766 }, { "epoch": 0.16025371524667775, "grad_norm": 7.134948801613209, "learning_rate": 9.561764104156139e-06, "loss": 17.7329, "step": 8767 }, { "epoch": 0.1602719944431243, "grad_norm": 6.642006525302264, "learning_rate": 9.561642907602923e-06, "loss": 17.2391, "step": 8768 }, { "epoch": 0.1602902736395708, "grad_norm": 6.475013961772217, "learning_rate": 9.561521695061547e-06, "loss": 17.4464, "step": 8769 }, { "epoch": 0.16030855283601733, "grad_norm": 7.212239729576027, "learning_rate": 9.561400466532433e-06, "loss": 17.1043, "step": 8770 }, { "epoch": 0.16032683203246384, "grad_norm": 7.095701461594068, "learning_rate": 9.561279222016004e-06, "loss": 17.7539, "step": 8771 }, { "epoch": 0.16034511122891038, "grad_norm": 6.593081321509201, "learning_rate": 9.56115796151269e-06, "loss": 17.5958, "step": 8772 }, { "epoch": 0.1603633904253569, "grad_norm": 8.66286614955099, "learning_rate": 9.561036685022911e-06, "loss": 18.3973, "step": 8773 }, { "epoch": 0.16038166962180342, "grad_norm": 7.398229057452632, "learning_rate": 9.560915392547095e-06, "loss": 17.97, "step": 8774 }, { "epoch": 0.16039994881824995, "grad_norm": 7.52899839892259, "learning_rate": 9.560794084085667e-06, "loss": 18.0149, "step": 8775 }, { "epoch": 0.16041822801469646, "grad_norm": 7.0884022559299735, "learning_rate": 9.560672759639052e-06, "loss": 17.6417, "step": 8776 }, { "epoch": 0.160436507211143, "grad_norm": 7.6345269325657314, "learning_rate": 9.560551419207673e-06, "loss": 18.2399, "step": 8777 }, { "epoch": 0.16045478640758953, "grad_norm": 6.786548576572578, "learning_rate": 9.560430062791956e-06, "loss": 17.7836, "step": 8778 }, { "epoch": 0.16047306560403604, "grad_norm": 7.630147309540174, "learning_rate": 9.560308690392331e-06, "loss": 17.9565, "step": 8779 }, { "epoch": 0.16049134480048258, "grad_norm": 9.243567669339185, "learning_rate": 9.560187302009216e-06, "loss": 18.5087, "step": 8780 }, { "epoch": 0.16050962399692908, "grad_norm": 6.68852694381934, "learning_rate": 9.560065897643043e-06, "loss": 17.4963, "step": 8781 }, { "epoch": 0.16052790319337562, "grad_norm": 7.723848633136946, "learning_rate": 9.559944477294235e-06, "loss": 17.9461, "step": 8782 }, { "epoch": 0.16054618238982216, "grad_norm": 6.027865934325465, "learning_rate": 9.559823040963214e-06, "loss": 17.4491, "step": 8783 }, { "epoch": 0.16056446158626866, "grad_norm": 6.525364454120052, "learning_rate": 9.55970158865041e-06, "loss": 17.6133, "step": 8784 }, { "epoch": 0.1605827407827152, "grad_norm": 6.234268240730879, "learning_rate": 9.55958012035625e-06, "loss": 17.5524, "step": 8785 }, { "epoch": 0.1606010199791617, "grad_norm": 7.6452263245109515, "learning_rate": 9.559458636081156e-06, "loss": 17.8828, "step": 8786 }, { "epoch": 0.16061929917560824, "grad_norm": 6.805593151866449, "learning_rate": 9.559337135825555e-06, "loss": 17.8694, "step": 8787 }, { "epoch": 0.16063757837205475, "grad_norm": 7.376660941074235, "learning_rate": 9.559215619589872e-06, "loss": 17.9796, "step": 8788 }, { "epoch": 0.1606558575685013, "grad_norm": 7.114462555908165, "learning_rate": 9.559094087374535e-06, "loss": 17.6144, "step": 8789 }, { "epoch": 0.16067413676494782, "grad_norm": 6.750929925152189, "learning_rate": 9.558972539179969e-06, "loss": 17.5734, "step": 8790 }, { "epoch": 0.16069241596139433, "grad_norm": 6.7640328775666125, "learning_rate": 9.558850975006599e-06, "loss": 17.6017, "step": 8791 }, { "epoch": 0.16071069515784087, "grad_norm": 6.540873147196798, "learning_rate": 9.558729394854854e-06, "loss": 17.5003, "step": 8792 }, { "epoch": 0.16072897435428737, "grad_norm": 5.795009420922162, "learning_rate": 9.558607798725155e-06, "loss": 17.0964, "step": 8793 }, { "epoch": 0.1607472535507339, "grad_norm": 7.8149691739728, "learning_rate": 9.558486186617933e-06, "loss": 17.9489, "step": 8794 }, { "epoch": 0.16076553274718045, "grad_norm": 5.961826323415727, "learning_rate": 9.558364558533613e-06, "loss": 17.3436, "step": 8795 }, { "epoch": 0.16078381194362695, "grad_norm": 7.980379462536579, "learning_rate": 9.558242914472619e-06, "loss": 18.2505, "step": 8796 }, { "epoch": 0.1608020911400735, "grad_norm": 8.799802937393846, "learning_rate": 9.55812125443538e-06, "loss": 18.4094, "step": 8797 }, { "epoch": 0.16082037033652, "grad_norm": 7.875497214055206, "learning_rate": 9.557999578422323e-06, "loss": 17.8522, "step": 8798 }, { "epoch": 0.16083864953296653, "grad_norm": 6.825628477609388, "learning_rate": 9.55787788643387e-06, "loss": 17.6042, "step": 8799 }, { "epoch": 0.16085692872941307, "grad_norm": 6.656232687908398, "learning_rate": 9.557756178470453e-06, "loss": 17.4498, "step": 8800 }, { "epoch": 0.16087520792585958, "grad_norm": 7.452719354689855, "learning_rate": 9.557634454532495e-06, "loss": 17.791, "step": 8801 }, { "epoch": 0.1608934871223061, "grad_norm": 7.075444203805298, "learning_rate": 9.557512714620424e-06, "loss": 18.0032, "step": 8802 }, { "epoch": 0.16091176631875262, "grad_norm": 7.753044557459597, "learning_rate": 9.557390958734667e-06, "loss": 18.0721, "step": 8803 }, { "epoch": 0.16093004551519915, "grad_norm": 6.633972339969448, "learning_rate": 9.557269186875649e-06, "loss": 17.4466, "step": 8804 }, { "epoch": 0.16094832471164566, "grad_norm": 6.876666467275851, "learning_rate": 9.5571473990438e-06, "loss": 17.6804, "step": 8805 }, { "epoch": 0.1609666039080922, "grad_norm": 8.2795245562326, "learning_rate": 9.557025595239543e-06, "loss": 17.9313, "step": 8806 }, { "epoch": 0.16098488310453873, "grad_norm": 6.303225996141523, "learning_rate": 9.556903775463306e-06, "loss": 17.5084, "step": 8807 }, { "epoch": 0.16100316230098524, "grad_norm": 6.5407205829138, "learning_rate": 9.556781939715519e-06, "loss": 17.3902, "step": 8808 }, { "epoch": 0.16102144149743178, "grad_norm": 8.460848418617777, "learning_rate": 9.556660087996605e-06, "loss": 18.3657, "step": 8809 }, { "epoch": 0.16103972069387829, "grad_norm": 6.897862897657835, "learning_rate": 9.556538220306994e-06, "loss": 17.5954, "step": 8810 }, { "epoch": 0.16105799989032482, "grad_norm": 7.164626874468124, "learning_rate": 9.55641633664711e-06, "loss": 17.7571, "step": 8811 }, { "epoch": 0.16107627908677136, "grad_norm": 6.833970612165855, "learning_rate": 9.556294437017383e-06, "loss": 17.6259, "step": 8812 }, { "epoch": 0.16109455828321786, "grad_norm": 6.4596259350114025, "learning_rate": 9.556172521418241e-06, "loss": 17.577, "step": 8813 }, { "epoch": 0.1611128374796644, "grad_norm": 7.498344388477673, "learning_rate": 9.556050589850109e-06, "loss": 17.7124, "step": 8814 }, { "epoch": 0.1611311166761109, "grad_norm": 7.971377103769383, "learning_rate": 9.555928642313415e-06, "loss": 18.1817, "step": 8815 }, { "epoch": 0.16114939587255744, "grad_norm": 6.699257541796852, "learning_rate": 9.555806678808586e-06, "loss": 17.6014, "step": 8816 }, { "epoch": 0.16116767506900398, "grad_norm": 7.14970775353149, "learning_rate": 9.55568469933605e-06, "loss": 17.8008, "step": 8817 }, { "epoch": 0.1611859542654505, "grad_norm": 6.899336621313897, "learning_rate": 9.555562703896232e-06, "loss": 17.7719, "step": 8818 }, { "epoch": 0.16120423346189702, "grad_norm": 7.5932976917227295, "learning_rate": 9.555440692489566e-06, "loss": 17.9854, "step": 8819 }, { "epoch": 0.16122251265834353, "grad_norm": 7.497653414850729, "learning_rate": 9.555318665116475e-06, "loss": 17.5895, "step": 8820 }, { "epoch": 0.16124079185479007, "grad_norm": 6.501683877973553, "learning_rate": 9.555196621777385e-06, "loss": 17.3843, "step": 8821 }, { "epoch": 0.16125907105123657, "grad_norm": 6.046399938171552, "learning_rate": 9.555074562472728e-06, "loss": 17.4456, "step": 8822 }, { "epoch": 0.1612773502476831, "grad_norm": 8.333521331479742, "learning_rate": 9.554952487202929e-06, "loss": 17.9339, "step": 8823 }, { "epoch": 0.16129562944412965, "grad_norm": 7.188884164898245, "learning_rate": 9.554830395968417e-06, "loss": 17.9014, "step": 8824 }, { "epoch": 0.16131390864057615, "grad_norm": 8.20227410730392, "learning_rate": 9.55470828876962e-06, "loss": 18.1764, "step": 8825 }, { "epoch": 0.1613321878370227, "grad_norm": 7.485773635555615, "learning_rate": 9.554586165606967e-06, "loss": 17.878, "step": 8826 }, { "epoch": 0.1613504670334692, "grad_norm": 8.393810519342246, "learning_rate": 9.554464026480884e-06, "loss": 18.1257, "step": 8827 }, { "epoch": 0.16136874622991573, "grad_norm": 7.989067804682207, "learning_rate": 9.554341871391799e-06, "loss": 17.798, "step": 8828 }, { "epoch": 0.16138702542636227, "grad_norm": 7.123354701496578, "learning_rate": 9.55421970034014e-06, "loss": 18.0067, "step": 8829 }, { "epoch": 0.16140530462280878, "grad_norm": 7.798993085522868, "learning_rate": 9.554097513326338e-06, "loss": 18.1148, "step": 8830 }, { "epoch": 0.1614235838192553, "grad_norm": 4.76814135126192, "learning_rate": 9.553975310350819e-06, "loss": 16.7965, "step": 8831 }, { "epoch": 0.16144186301570182, "grad_norm": 6.4682768640768336, "learning_rate": 9.55385309141401e-06, "loss": 17.2909, "step": 8832 }, { "epoch": 0.16146014221214836, "grad_norm": 6.797191004024916, "learning_rate": 9.553730856516343e-06, "loss": 17.7865, "step": 8833 }, { "epoch": 0.1614784214085949, "grad_norm": 7.591471038689188, "learning_rate": 9.553608605658244e-06, "loss": 17.6864, "step": 8834 }, { "epoch": 0.1614967006050414, "grad_norm": 6.842949793100091, "learning_rate": 9.553486338840143e-06, "loss": 17.3557, "step": 8835 }, { "epoch": 0.16151497980148793, "grad_norm": 8.929409275541913, "learning_rate": 9.553364056062467e-06, "loss": 18.7151, "step": 8836 }, { "epoch": 0.16153325899793444, "grad_norm": 7.785349282093318, "learning_rate": 9.553241757325644e-06, "loss": 17.9587, "step": 8837 }, { "epoch": 0.16155153819438098, "grad_norm": 8.211137903475462, "learning_rate": 9.553119442630103e-06, "loss": 17.9899, "step": 8838 }, { "epoch": 0.16156981739082749, "grad_norm": 6.7164478822489375, "learning_rate": 9.552997111976275e-06, "loss": 17.529, "step": 8839 }, { "epoch": 0.16158809658727402, "grad_norm": 7.546942335017686, "learning_rate": 9.552874765364587e-06, "loss": 17.9005, "step": 8840 }, { "epoch": 0.16160637578372056, "grad_norm": 8.153689286797787, "learning_rate": 9.552752402795469e-06, "loss": 18.4123, "step": 8841 }, { "epoch": 0.16162465498016707, "grad_norm": 7.152845343535854, "learning_rate": 9.552630024269347e-06, "loss": 17.7524, "step": 8842 }, { "epoch": 0.1616429341766136, "grad_norm": 7.337910363949701, "learning_rate": 9.552507629786653e-06, "loss": 17.8114, "step": 8843 }, { "epoch": 0.1616612133730601, "grad_norm": 7.350235652986651, "learning_rate": 9.552385219347816e-06, "loss": 17.8237, "step": 8844 }, { "epoch": 0.16167949256950664, "grad_norm": 6.14665711848872, "learning_rate": 9.552262792953262e-06, "loss": 17.3868, "step": 8845 }, { "epoch": 0.16169777176595318, "grad_norm": 6.272718841791613, "learning_rate": 9.55214035060342e-06, "loss": 17.4701, "step": 8846 }, { "epoch": 0.1617160509623997, "grad_norm": 6.863264719435674, "learning_rate": 9.552017892298724e-06, "loss": 17.508, "step": 8847 }, { "epoch": 0.16173433015884622, "grad_norm": 7.136719533652093, "learning_rate": 9.551895418039601e-06, "loss": 17.4004, "step": 8848 }, { "epoch": 0.16175260935529273, "grad_norm": 7.283507678344315, "learning_rate": 9.551772927826477e-06, "loss": 17.9247, "step": 8849 }, { "epoch": 0.16177088855173927, "grad_norm": 6.05289082646221, "learning_rate": 9.551650421659786e-06, "loss": 17.0785, "step": 8850 }, { "epoch": 0.1617891677481858, "grad_norm": 8.625028740166986, "learning_rate": 9.551527899539954e-06, "loss": 17.9702, "step": 8851 }, { "epoch": 0.1618074469446323, "grad_norm": 6.3450623363035294, "learning_rate": 9.551405361467412e-06, "loss": 17.3423, "step": 8852 }, { "epoch": 0.16182572614107885, "grad_norm": 6.320029822690952, "learning_rate": 9.55128280744259e-06, "loss": 17.4594, "step": 8853 }, { "epoch": 0.16184400533752535, "grad_norm": 8.117196672477366, "learning_rate": 9.551160237465915e-06, "loss": 18.0902, "step": 8854 }, { "epoch": 0.1618622845339719, "grad_norm": 11.078199725555908, "learning_rate": 9.55103765153782e-06, "loss": 18.8538, "step": 8855 }, { "epoch": 0.1618805637304184, "grad_norm": 8.079093213144644, "learning_rate": 9.550915049658733e-06, "loss": 18.4629, "step": 8856 }, { "epoch": 0.16189884292686493, "grad_norm": 6.484534105389004, "learning_rate": 9.550792431829082e-06, "loss": 17.5436, "step": 8857 }, { "epoch": 0.16191712212331147, "grad_norm": 7.49800621285524, "learning_rate": 9.5506697980493e-06, "loss": 18.0245, "step": 8858 }, { "epoch": 0.16193540131975798, "grad_norm": 6.5500496032887545, "learning_rate": 9.550547148319814e-06, "loss": 17.3675, "step": 8859 }, { "epoch": 0.1619536805162045, "grad_norm": 9.450945561850927, "learning_rate": 9.550424482641057e-06, "loss": 18.4343, "step": 8860 }, { "epoch": 0.16197195971265102, "grad_norm": 5.6372846773157015, "learning_rate": 9.550301801013456e-06, "loss": 17.2191, "step": 8861 }, { "epoch": 0.16199023890909756, "grad_norm": 8.082414695165987, "learning_rate": 9.55017910343744e-06, "loss": 18.0178, "step": 8862 }, { "epoch": 0.1620085181055441, "grad_norm": 6.189643379768784, "learning_rate": 9.550056389913443e-06, "loss": 17.2269, "step": 8863 }, { "epoch": 0.1620267973019906, "grad_norm": 7.229971847798873, "learning_rate": 9.549933660441892e-06, "loss": 17.4883, "step": 8864 }, { "epoch": 0.16204507649843714, "grad_norm": 6.811545341412197, "learning_rate": 9.549810915023222e-06, "loss": 17.3983, "step": 8865 }, { "epoch": 0.16206335569488364, "grad_norm": 6.727419034143885, "learning_rate": 9.549688153657855e-06, "loss": 17.4161, "step": 8866 }, { "epoch": 0.16208163489133018, "grad_norm": 7.330171877370214, "learning_rate": 9.549565376346229e-06, "loss": 17.8667, "step": 8867 }, { "epoch": 0.16209991408777671, "grad_norm": 8.272764506260637, "learning_rate": 9.549442583088769e-06, "loss": 18.1347, "step": 8868 }, { "epoch": 0.16211819328422322, "grad_norm": 6.147822248842741, "learning_rate": 9.549319773885908e-06, "loss": 17.4762, "step": 8869 }, { "epoch": 0.16213647248066976, "grad_norm": 6.189533530420878, "learning_rate": 9.549196948738078e-06, "loss": 17.1608, "step": 8870 }, { "epoch": 0.16215475167711627, "grad_norm": 6.38942492187238, "learning_rate": 9.549074107645704e-06, "loss": 17.533, "step": 8871 }, { "epoch": 0.1621730308735628, "grad_norm": 5.844114971106341, "learning_rate": 9.548951250609223e-06, "loss": 17.0304, "step": 8872 }, { "epoch": 0.1621913100700093, "grad_norm": 6.919844670071196, "learning_rate": 9.54882837762906e-06, "loss": 17.6341, "step": 8873 }, { "epoch": 0.16220958926645584, "grad_norm": 6.1769616846319675, "learning_rate": 9.548705488705651e-06, "loss": 17.6437, "step": 8874 }, { "epoch": 0.16222786846290238, "grad_norm": 7.123526203979538, "learning_rate": 9.548582583839424e-06, "loss": 17.6275, "step": 8875 }, { "epoch": 0.1622461476593489, "grad_norm": 7.8622977543729, "learning_rate": 9.548459663030807e-06, "loss": 18.1613, "step": 8876 }, { "epoch": 0.16226442685579542, "grad_norm": 6.988235033949581, "learning_rate": 9.548336726280235e-06, "loss": 17.8994, "step": 8877 }, { "epoch": 0.16228270605224193, "grad_norm": 8.668390020559814, "learning_rate": 9.548213773588137e-06, "loss": 18.4199, "step": 8878 }, { "epoch": 0.16230098524868847, "grad_norm": 7.592850934071577, "learning_rate": 9.548090804954946e-06, "loss": 18.0574, "step": 8879 }, { "epoch": 0.162319264445135, "grad_norm": 6.243808839154162, "learning_rate": 9.54796782038109e-06, "loss": 17.2268, "step": 8880 }, { "epoch": 0.1623375436415815, "grad_norm": 6.92471135372571, "learning_rate": 9.547844819867002e-06, "loss": 17.7505, "step": 8881 }, { "epoch": 0.16235582283802805, "grad_norm": 8.250122694694106, "learning_rate": 9.547721803413113e-06, "loss": 18.4426, "step": 8882 }, { "epoch": 0.16237410203447455, "grad_norm": 7.90110995616912, "learning_rate": 9.547598771019853e-06, "loss": 17.9777, "step": 8883 }, { "epoch": 0.1623923812309211, "grad_norm": 7.675239492202096, "learning_rate": 9.547475722687653e-06, "loss": 18.0677, "step": 8884 }, { "epoch": 0.16241066042736763, "grad_norm": 7.2494996796705635, "learning_rate": 9.547352658416946e-06, "loss": 17.9652, "step": 8885 }, { "epoch": 0.16242893962381413, "grad_norm": 7.074103628188423, "learning_rate": 9.547229578208164e-06, "loss": 17.7379, "step": 8886 }, { "epoch": 0.16244721882026067, "grad_norm": 6.9007334560151685, "learning_rate": 9.547106482061734e-06, "loss": 17.7754, "step": 8887 }, { "epoch": 0.16246549801670718, "grad_norm": 6.010855450347364, "learning_rate": 9.546983369978093e-06, "loss": 17.1205, "step": 8888 }, { "epoch": 0.1624837772131537, "grad_norm": 7.231444756815707, "learning_rate": 9.546860241957669e-06, "loss": 17.7948, "step": 8889 }, { "epoch": 0.16250205640960022, "grad_norm": 7.131651381970469, "learning_rate": 9.546737098000893e-06, "loss": 17.9893, "step": 8890 }, { "epoch": 0.16252033560604676, "grad_norm": 7.70387932922149, "learning_rate": 9.5466139381082e-06, "loss": 18.0789, "step": 8891 }, { "epoch": 0.1625386148024933, "grad_norm": 6.632072531801575, "learning_rate": 9.546490762280018e-06, "loss": 17.6724, "step": 8892 }, { "epoch": 0.1625568939989398, "grad_norm": 6.936192890123043, "learning_rate": 9.546367570516782e-06, "loss": 17.6417, "step": 8893 }, { "epoch": 0.16257517319538634, "grad_norm": 6.956442992720794, "learning_rate": 9.546244362818922e-06, "loss": 17.681, "step": 8894 }, { "epoch": 0.16259345239183284, "grad_norm": 7.446021023217806, "learning_rate": 9.546121139186869e-06, "loss": 18.232, "step": 8895 }, { "epoch": 0.16261173158827938, "grad_norm": 7.878532797276115, "learning_rate": 9.545997899621057e-06, "loss": 17.8911, "step": 8896 }, { "epoch": 0.16263001078472591, "grad_norm": 6.543557569485853, "learning_rate": 9.545874644121915e-06, "loss": 17.519, "step": 8897 }, { "epoch": 0.16264828998117242, "grad_norm": 7.015477953441177, "learning_rate": 9.545751372689879e-06, "loss": 17.8328, "step": 8898 }, { "epoch": 0.16266656917761896, "grad_norm": 6.3181880335265665, "learning_rate": 9.545628085325378e-06, "loss": 17.2829, "step": 8899 }, { "epoch": 0.16268484837406547, "grad_norm": 6.697700055575188, "learning_rate": 9.545504782028845e-06, "loss": 17.3818, "step": 8900 }, { "epoch": 0.162703127570512, "grad_norm": 5.856440390979823, "learning_rate": 9.545381462800713e-06, "loss": 17.1333, "step": 8901 }, { "epoch": 0.16272140676695854, "grad_norm": 7.761739161167816, "learning_rate": 9.545258127641412e-06, "loss": 17.8042, "step": 8902 }, { "epoch": 0.16273968596340505, "grad_norm": 5.975151780291863, "learning_rate": 9.545134776551377e-06, "loss": 17.4304, "step": 8903 }, { "epoch": 0.16275796515985158, "grad_norm": 6.5665951275741845, "learning_rate": 9.545011409531037e-06, "loss": 17.7145, "step": 8904 }, { "epoch": 0.1627762443562981, "grad_norm": 6.733855110402917, "learning_rate": 9.544888026580827e-06, "loss": 17.099, "step": 8905 }, { "epoch": 0.16279452355274462, "grad_norm": 5.905538250052269, "learning_rate": 9.54476462770118e-06, "loss": 17.0874, "step": 8906 }, { "epoch": 0.16281280274919113, "grad_norm": 6.573048580583706, "learning_rate": 9.544641212892526e-06, "loss": 17.5165, "step": 8907 }, { "epoch": 0.16283108194563767, "grad_norm": 9.295126203116364, "learning_rate": 9.544517782155302e-06, "loss": 18.2697, "step": 8908 }, { "epoch": 0.1628493611420842, "grad_norm": 8.26365264438235, "learning_rate": 9.544394335489935e-06, "loss": 17.8954, "step": 8909 }, { "epoch": 0.1628676403385307, "grad_norm": 7.084231977177663, "learning_rate": 9.54427087289686e-06, "loss": 17.4922, "step": 8910 }, { "epoch": 0.16288591953497725, "grad_norm": 6.964164989018065, "learning_rate": 9.54414739437651e-06, "loss": 17.7176, "step": 8911 }, { "epoch": 0.16290419873142375, "grad_norm": 6.997736867494417, "learning_rate": 9.54402389992932e-06, "loss": 17.4331, "step": 8912 }, { "epoch": 0.1629224779278703, "grad_norm": 8.043540829147126, "learning_rate": 9.543900389555718e-06, "loss": 18.0513, "step": 8913 }, { "epoch": 0.16294075712431683, "grad_norm": 7.16888807126414, "learning_rate": 9.54377686325614e-06, "loss": 17.7014, "step": 8914 }, { "epoch": 0.16295903632076333, "grad_norm": 8.30871827815993, "learning_rate": 9.54365332103102e-06, "loss": 18.0837, "step": 8915 }, { "epoch": 0.16297731551720987, "grad_norm": 8.503896845917318, "learning_rate": 9.543529762880787e-06, "loss": 18.0608, "step": 8916 }, { "epoch": 0.16299559471365638, "grad_norm": 6.712750605148803, "learning_rate": 9.543406188805877e-06, "loss": 17.3239, "step": 8917 }, { "epoch": 0.1630138739101029, "grad_norm": 6.401990566639425, "learning_rate": 9.543282598806723e-06, "loss": 17.4695, "step": 8918 }, { "epoch": 0.16303215310654945, "grad_norm": 6.642985318262605, "learning_rate": 9.543158992883758e-06, "loss": 17.6847, "step": 8919 }, { "epoch": 0.16305043230299596, "grad_norm": 7.542441204187607, "learning_rate": 9.543035371037415e-06, "loss": 18.0636, "step": 8920 }, { "epoch": 0.1630687114994425, "grad_norm": 6.2766294396528375, "learning_rate": 9.542911733268126e-06, "loss": 17.5489, "step": 8921 }, { "epoch": 0.163086990695889, "grad_norm": 7.384420981284008, "learning_rate": 9.542788079576326e-06, "loss": 17.7955, "step": 8922 }, { "epoch": 0.16310526989233554, "grad_norm": 5.8974784763862935, "learning_rate": 9.54266440996245e-06, "loss": 17.3754, "step": 8923 }, { "epoch": 0.16312354908878204, "grad_norm": 6.578968888106481, "learning_rate": 9.542540724426927e-06, "loss": 17.6573, "step": 8924 }, { "epoch": 0.16314182828522858, "grad_norm": 6.860280161621239, "learning_rate": 9.542417022970194e-06, "loss": 17.6178, "step": 8925 }, { "epoch": 0.16316010748167512, "grad_norm": 6.979930012077105, "learning_rate": 9.542293305592683e-06, "loss": 17.6281, "step": 8926 }, { "epoch": 0.16317838667812162, "grad_norm": 7.599394431780828, "learning_rate": 9.54216957229483e-06, "loss": 18.0106, "step": 8927 }, { "epoch": 0.16319666587456816, "grad_norm": 7.238928157250578, "learning_rate": 9.542045823077064e-06, "loss": 18.2261, "step": 8928 }, { "epoch": 0.16321494507101467, "grad_norm": 7.766677650452347, "learning_rate": 9.541922057939823e-06, "loss": 18.0347, "step": 8929 }, { "epoch": 0.1632332242674612, "grad_norm": 7.662017576676054, "learning_rate": 9.54179827688354e-06, "loss": 17.8838, "step": 8930 }, { "epoch": 0.16325150346390774, "grad_norm": 7.232795696096888, "learning_rate": 9.541674479908647e-06, "loss": 17.866, "step": 8931 }, { "epoch": 0.16326978266035425, "grad_norm": 7.886448752917773, "learning_rate": 9.54155066701558e-06, "loss": 17.6537, "step": 8932 }, { "epoch": 0.16328806185680078, "grad_norm": 7.881010893635565, "learning_rate": 9.541426838204771e-06, "loss": 18.0129, "step": 8933 }, { "epoch": 0.1633063410532473, "grad_norm": 7.606065616458181, "learning_rate": 9.541302993476655e-06, "loss": 17.8275, "step": 8934 }, { "epoch": 0.16332462024969382, "grad_norm": 7.123188545637438, "learning_rate": 9.541179132831666e-06, "loss": 17.6886, "step": 8935 }, { "epoch": 0.16334289944614036, "grad_norm": 7.226467542992779, "learning_rate": 9.54105525627024e-06, "loss": 17.7167, "step": 8936 }, { "epoch": 0.16336117864258687, "grad_norm": 5.937890595858325, "learning_rate": 9.540931363792808e-06, "loss": 17.3241, "step": 8937 }, { "epoch": 0.1633794578390334, "grad_norm": 6.032465860912279, "learning_rate": 9.540807455399806e-06, "loss": 17.4818, "step": 8938 }, { "epoch": 0.1633977370354799, "grad_norm": 7.150257416553276, "learning_rate": 9.540683531091667e-06, "loss": 17.7593, "step": 8939 }, { "epoch": 0.16341601623192645, "grad_norm": 6.682123741954857, "learning_rate": 9.540559590868826e-06, "loss": 17.4994, "step": 8940 }, { "epoch": 0.16343429542837296, "grad_norm": 7.771814579829153, "learning_rate": 9.54043563473172e-06, "loss": 18.012, "step": 8941 }, { "epoch": 0.1634525746248195, "grad_norm": 7.097596182243404, "learning_rate": 9.540311662680779e-06, "loss": 17.7753, "step": 8942 }, { "epoch": 0.16347085382126603, "grad_norm": 7.096495466577756, "learning_rate": 9.540187674716439e-06, "loss": 17.9051, "step": 8943 }, { "epoch": 0.16348913301771253, "grad_norm": 6.006982250832487, "learning_rate": 9.540063670839138e-06, "loss": 17.148, "step": 8944 }, { "epoch": 0.16350741221415907, "grad_norm": 11.197540538981825, "learning_rate": 9.539939651049306e-06, "loss": 17.011, "step": 8945 }, { "epoch": 0.16352569141060558, "grad_norm": 7.675777563157956, "learning_rate": 9.539815615347378e-06, "loss": 18.1948, "step": 8946 }, { "epoch": 0.1635439706070521, "grad_norm": 9.277697757170023, "learning_rate": 9.539691563733793e-06, "loss": 18.748, "step": 8947 }, { "epoch": 0.16356224980349865, "grad_norm": 6.754838801733986, "learning_rate": 9.53956749620898e-06, "loss": 17.4221, "step": 8948 }, { "epoch": 0.16358052899994516, "grad_norm": 7.415932795215124, "learning_rate": 9.53944341277338e-06, "loss": 17.9688, "step": 8949 }, { "epoch": 0.1635988081963917, "grad_norm": 6.2238818314264, "learning_rate": 9.539319313427424e-06, "loss": 17.4398, "step": 8950 }, { "epoch": 0.1636170873928382, "grad_norm": 9.021443626345528, "learning_rate": 9.539195198171547e-06, "loss": 18.203, "step": 8951 }, { "epoch": 0.16363536658928474, "grad_norm": 9.027950393619902, "learning_rate": 9.539071067006185e-06, "loss": 18.3249, "step": 8952 }, { "epoch": 0.16365364578573127, "grad_norm": 7.381196928707877, "learning_rate": 9.538946919931773e-06, "loss": 18.3222, "step": 8953 }, { "epoch": 0.16367192498217778, "grad_norm": 7.174947326296616, "learning_rate": 9.538822756948746e-06, "loss": 17.8498, "step": 8954 }, { "epoch": 0.16369020417862432, "grad_norm": 6.901302070754817, "learning_rate": 9.538698578057538e-06, "loss": 17.7299, "step": 8955 }, { "epoch": 0.16370848337507082, "grad_norm": 7.761207539143965, "learning_rate": 9.538574383258586e-06, "loss": 18.3238, "step": 8956 }, { "epoch": 0.16372676257151736, "grad_norm": 7.44744965291629, "learning_rate": 9.538450172552324e-06, "loss": 17.5281, "step": 8957 }, { "epoch": 0.16374504176796387, "grad_norm": 6.987075909306739, "learning_rate": 9.53832594593919e-06, "loss": 17.7534, "step": 8958 }, { "epoch": 0.1637633209644104, "grad_norm": 6.624246707335277, "learning_rate": 9.538201703419616e-06, "loss": 17.6023, "step": 8959 }, { "epoch": 0.16378160016085694, "grad_norm": 7.097707090816197, "learning_rate": 9.538077444994039e-06, "loss": 18.0419, "step": 8960 }, { "epoch": 0.16379987935730345, "grad_norm": 7.760668279565784, "learning_rate": 9.537953170662894e-06, "loss": 17.9141, "step": 8961 }, { "epoch": 0.16381815855374998, "grad_norm": 8.699347616373558, "learning_rate": 9.537828880426617e-06, "loss": 18.2053, "step": 8962 }, { "epoch": 0.1638364377501965, "grad_norm": 7.326815693030651, "learning_rate": 9.537704574285644e-06, "loss": 17.7434, "step": 8963 }, { "epoch": 0.16385471694664303, "grad_norm": 8.0552337540858, "learning_rate": 9.53758025224041e-06, "loss": 18.2094, "step": 8964 }, { "epoch": 0.16387299614308956, "grad_norm": 8.021030654479663, "learning_rate": 9.537455914291351e-06, "loss": 17.5572, "step": 8965 }, { "epoch": 0.16389127533953607, "grad_norm": 9.40369593502039, "learning_rate": 9.537331560438903e-06, "loss": 18.1477, "step": 8966 }, { "epoch": 0.1639095545359826, "grad_norm": 7.449777743755273, "learning_rate": 9.537207190683501e-06, "loss": 18.0022, "step": 8967 }, { "epoch": 0.1639278337324291, "grad_norm": 6.755855208741576, "learning_rate": 9.537082805025581e-06, "loss": 17.6478, "step": 8968 }, { "epoch": 0.16394611292887565, "grad_norm": 7.829269949213463, "learning_rate": 9.536958403465581e-06, "loss": 18.2046, "step": 8969 }, { "epoch": 0.16396439212532218, "grad_norm": 7.561169726588245, "learning_rate": 9.536833986003935e-06, "loss": 17.9798, "step": 8970 }, { "epoch": 0.1639826713217687, "grad_norm": 7.979160217539934, "learning_rate": 9.536709552641079e-06, "loss": 18.1425, "step": 8971 }, { "epoch": 0.16400095051821523, "grad_norm": 7.4120818373445925, "learning_rate": 9.53658510337745e-06, "loss": 18.0333, "step": 8972 }, { "epoch": 0.16401922971466174, "grad_norm": 7.110277402834711, "learning_rate": 9.536460638213484e-06, "loss": 17.8487, "step": 8973 }, { "epoch": 0.16403750891110827, "grad_norm": 6.450188051453727, "learning_rate": 9.536336157149617e-06, "loss": 17.5824, "step": 8974 }, { "epoch": 0.16405578810755478, "grad_norm": 6.619651163009498, "learning_rate": 9.536211660186285e-06, "loss": 17.6198, "step": 8975 }, { "epoch": 0.16407406730400131, "grad_norm": 6.628904688632431, "learning_rate": 9.536087147323925e-06, "loss": 17.3563, "step": 8976 }, { "epoch": 0.16409234650044785, "grad_norm": 6.390833912123965, "learning_rate": 9.535962618562973e-06, "loss": 17.6438, "step": 8977 }, { "epoch": 0.16411062569689436, "grad_norm": 8.84388746773094, "learning_rate": 9.535838073903867e-06, "loss": 18.0541, "step": 8978 }, { "epoch": 0.1641289048933409, "grad_norm": 6.898897406262503, "learning_rate": 9.535713513347041e-06, "loss": 17.5198, "step": 8979 }, { "epoch": 0.1641471840897874, "grad_norm": 6.938844238649979, "learning_rate": 9.535588936892934e-06, "loss": 17.473, "step": 8980 }, { "epoch": 0.16416546328623394, "grad_norm": 10.265314021194731, "learning_rate": 9.53546434454198e-06, "loss": 17.8857, "step": 8981 }, { "epoch": 0.16418374248268047, "grad_norm": 5.973072517552841, "learning_rate": 9.535339736294618e-06, "loss": 17.269, "step": 8982 }, { "epoch": 0.16420202167912698, "grad_norm": 6.949757546277983, "learning_rate": 9.535215112151281e-06, "loss": 17.7386, "step": 8983 }, { "epoch": 0.16422030087557352, "grad_norm": 6.326312509864464, "learning_rate": 9.535090472112411e-06, "loss": 17.2417, "step": 8984 }, { "epoch": 0.16423858007202002, "grad_norm": 9.264120030924737, "learning_rate": 9.534965816178443e-06, "loss": 18.3123, "step": 8985 }, { "epoch": 0.16425685926846656, "grad_norm": 6.772672020924337, "learning_rate": 9.534841144349813e-06, "loss": 17.7576, "step": 8986 }, { "epoch": 0.1642751384649131, "grad_norm": 8.329126811228969, "learning_rate": 9.534716456626957e-06, "loss": 18.0118, "step": 8987 }, { "epoch": 0.1642934176613596, "grad_norm": 7.400742624479253, "learning_rate": 9.534591753010314e-06, "loss": 17.9224, "step": 8988 }, { "epoch": 0.16431169685780614, "grad_norm": 6.007836449564718, "learning_rate": 9.53446703350032e-06, "loss": 17.0726, "step": 8989 }, { "epoch": 0.16432997605425265, "grad_norm": 7.562766480730223, "learning_rate": 9.534342298097412e-06, "loss": 17.8489, "step": 8990 }, { "epoch": 0.16434825525069918, "grad_norm": 6.98880304185308, "learning_rate": 9.53421754680203e-06, "loss": 17.4952, "step": 8991 }, { "epoch": 0.1643665344471457, "grad_norm": 9.724134196821625, "learning_rate": 9.534092779614607e-06, "loss": 18.9074, "step": 8992 }, { "epoch": 0.16438481364359223, "grad_norm": 7.251382567170701, "learning_rate": 9.533967996535584e-06, "loss": 17.9577, "step": 8993 }, { "epoch": 0.16440309284003876, "grad_norm": 7.498903365392758, "learning_rate": 9.533843197565396e-06, "loss": 18.052, "step": 8994 }, { "epoch": 0.16442137203648527, "grad_norm": 7.262856847283712, "learning_rate": 9.53371838270448e-06, "loss": 17.8097, "step": 8995 }, { "epoch": 0.1644396512329318, "grad_norm": 7.681262369820483, "learning_rate": 9.533593551953276e-06, "loss": 18.0161, "step": 8996 }, { "epoch": 0.1644579304293783, "grad_norm": 6.880805878430297, "learning_rate": 9.533468705312218e-06, "loss": 17.5347, "step": 8997 }, { "epoch": 0.16447620962582485, "grad_norm": 7.242434003077306, "learning_rate": 9.533343842781746e-06, "loss": 17.8824, "step": 8998 }, { "epoch": 0.16449448882227138, "grad_norm": 9.904514161620357, "learning_rate": 9.533218964362299e-06, "loss": 18.8511, "step": 8999 }, { "epoch": 0.1645127680187179, "grad_norm": 7.483815263177726, "learning_rate": 9.533094070054311e-06, "loss": 17.3156, "step": 9000 }, { "epoch": 0.16453104721516443, "grad_norm": 8.05181023024189, "learning_rate": 9.532969159858223e-06, "loss": 18.2519, "step": 9001 }, { "epoch": 0.16454932641161094, "grad_norm": 7.113555245957429, "learning_rate": 9.53284423377447e-06, "loss": 17.8087, "step": 9002 }, { "epoch": 0.16456760560805747, "grad_norm": 7.012927381244689, "learning_rate": 9.532719291803492e-06, "loss": 17.6893, "step": 9003 }, { "epoch": 0.164585884804504, "grad_norm": 6.958872956133071, "learning_rate": 9.532594333945727e-06, "loss": 17.6789, "step": 9004 }, { "epoch": 0.16460416400095051, "grad_norm": 7.436941121855428, "learning_rate": 9.532469360201612e-06, "loss": 18.0439, "step": 9005 }, { "epoch": 0.16462244319739705, "grad_norm": 8.062205985339064, "learning_rate": 9.532344370571584e-06, "loss": 18.1162, "step": 9006 }, { "epoch": 0.16464072239384356, "grad_norm": 7.177649603497674, "learning_rate": 9.532219365056083e-06, "loss": 17.6347, "step": 9007 }, { "epoch": 0.1646590015902901, "grad_norm": 6.7695673105289025, "learning_rate": 9.532094343655548e-06, "loss": 17.4804, "step": 9008 }, { "epoch": 0.1646772807867366, "grad_norm": 8.496341139684638, "learning_rate": 9.531969306370412e-06, "loss": 18.0347, "step": 9009 }, { "epoch": 0.16469555998318314, "grad_norm": 7.318266467326885, "learning_rate": 9.531844253201119e-06, "loss": 18.001, "step": 9010 }, { "epoch": 0.16471383917962967, "grad_norm": 7.999530387100114, "learning_rate": 9.531719184148106e-06, "loss": 18.4319, "step": 9011 }, { "epoch": 0.16473211837607618, "grad_norm": 7.3789199094497535, "learning_rate": 9.53159409921181e-06, "loss": 17.6925, "step": 9012 }, { "epoch": 0.16475039757252272, "grad_norm": 7.426827848723716, "learning_rate": 9.531468998392669e-06, "loss": 17.5441, "step": 9013 }, { "epoch": 0.16476867676896922, "grad_norm": 7.415358347673079, "learning_rate": 9.531343881691122e-06, "loss": 17.6196, "step": 9014 }, { "epoch": 0.16478695596541576, "grad_norm": 8.714016979434655, "learning_rate": 9.53121874910761e-06, "loss": 18.4228, "step": 9015 }, { "epoch": 0.1648052351618623, "grad_norm": 7.923681657299792, "learning_rate": 9.531093600642567e-06, "loss": 17.8882, "step": 9016 }, { "epoch": 0.1648235143583088, "grad_norm": 7.345767412881944, "learning_rate": 9.530968436296435e-06, "loss": 17.8592, "step": 9017 }, { "epoch": 0.16484179355475534, "grad_norm": 6.56783751919579, "learning_rate": 9.530843256069654e-06, "loss": 17.6087, "step": 9018 }, { "epoch": 0.16486007275120185, "grad_norm": 7.22552776237849, "learning_rate": 9.530718059962658e-06, "loss": 17.7078, "step": 9019 }, { "epoch": 0.16487835194764838, "grad_norm": 5.982942068514054, "learning_rate": 9.53059284797589e-06, "loss": 17.0983, "step": 9020 }, { "epoch": 0.16489663114409492, "grad_norm": 7.0002543746521795, "learning_rate": 9.530467620109786e-06, "loss": 17.7523, "step": 9021 }, { "epoch": 0.16491491034054143, "grad_norm": 8.649614178075742, "learning_rate": 9.530342376364786e-06, "loss": 17.756, "step": 9022 }, { "epoch": 0.16493318953698796, "grad_norm": 7.16265798516129, "learning_rate": 9.530217116741329e-06, "loss": 17.9631, "step": 9023 }, { "epoch": 0.16495146873343447, "grad_norm": 5.529878030601523, "learning_rate": 9.530091841239854e-06, "loss": 17.0858, "step": 9024 }, { "epoch": 0.164969747929881, "grad_norm": 5.93450893703127, "learning_rate": 9.529966549860801e-06, "loss": 17.1779, "step": 9025 }, { "epoch": 0.1649880271263275, "grad_norm": 5.920941626064105, "learning_rate": 9.529841242604609e-06, "loss": 17.3672, "step": 9026 }, { "epoch": 0.16500630632277405, "grad_norm": 7.387747158692213, "learning_rate": 9.529715919471715e-06, "loss": 17.7749, "step": 9027 }, { "epoch": 0.16502458551922058, "grad_norm": 8.98797917732331, "learning_rate": 9.529590580462562e-06, "loss": 18.6328, "step": 9028 }, { "epoch": 0.1650428647156671, "grad_norm": 6.590188461567282, "learning_rate": 9.529465225577586e-06, "loss": 17.5267, "step": 9029 }, { "epoch": 0.16506114391211363, "grad_norm": 8.336657364415158, "learning_rate": 9.529339854817226e-06, "loss": 17.4487, "step": 9030 }, { "epoch": 0.16507942310856014, "grad_norm": 7.971714010329702, "learning_rate": 9.529214468181924e-06, "loss": 18.2908, "step": 9031 }, { "epoch": 0.16509770230500667, "grad_norm": 6.820560068494281, "learning_rate": 9.529089065672118e-06, "loss": 17.4721, "step": 9032 }, { "epoch": 0.1651159815014532, "grad_norm": 7.444814055406021, "learning_rate": 9.528963647288247e-06, "loss": 17.5634, "step": 9033 }, { "epoch": 0.16513426069789972, "grad_norm": 8.52045768338414, "learning_rate": 9.528838213030753e-06, "loss": 18.699, "step": 9034 }, { "epoch": 0.16515253989434625, "grad_norm": 5.784679984227972, "learning_rate": 9.528712762900074e-06, "loss": 17.4384, "step": 9035 }, { "epoch": 0.16517081909079276, "grad_norm": 7.653037685529301, "learning_rate": 9.528587296896649e-06, "loss": 18.2578, "step": 9036 }, { "epoch": 0.1651890982872393, "grad_norm": 6.799617147984463, "learning_rate": 9.528461815020918e-06, "loss": 17.8007, "step": 9037 }, { "epoch": 0.16520737748368583, "grad_norm": 6.968598825158566, "learning_rate": 9.528336317273324e-06, "loss": 17.888, "step": 9038 }, { "epoch": 0.16522565668013234, "grad_norm": 7.165305393866474, "learning_rate": 9.528210803654302e-06, "loss": 17.699, "step": 9039 }, { "epoch": 0.16524393587657887, "grad_norm": 5.371448743197811, "learning_rate": 9.528085274164294e-06, "loss": 17.0871, "step": 9040 }, { "epoch": 0.16526221507302538, "grad_norm": 7.1524163925995685, "learning_rate": 9.52795972880374e-06, "loss": 18.0725, "step": 9041 }, { "epoch": 0.16528049426947192, "grad_norm": 6.932547375366222, "learning_rate": 9.52783416757308e-06, "loss": 17.4842, "step": 9042 }, { "epoch": 0.16529877346591842, "grad_norm": 6.666372747082885, "learning_rate": 9.527708590472755e-06, "loss": 17.5194, "step": 9043 }, { "epoch": 0.16531705266236496, "grad_norm": 6.116693421795748, "learning_rate": 9.527582997503203e-06, "loss": 17.6409, "step": 9044 }, { "epoch": 0.1653353318588115, "grad_norm": 6.27081926121905, "learning_rate": 9.527457388664866e-06, "loss": 17.5528, "step": 9045 }, { "epoch": 0.165353611055258, "grad_norm": 6.842569182429887, "learning_rate": 9.527331763958186e-06, "loss": 17.6139, "step": 9046 }, { "epoch": 0.16537189025170454, "grad_norm": 8.74274288483976, "learning_rate": 9.527206123383597e-06, "loss": 18.2157, "step": 9047 }, { "epoch": 0.16539016944815105, "grad_norm": 6.264777251475679, "learning_rate": 9.527080466941546e-06, "loss": 17.285, "step": 9048 }, { "epoch": 0.16540844864459758, "grad_norm": 7.537827085353265, "learning_rate": 9.52695479463247e-06, "loss": 18.1821, "step": 9049 }, { "epoch": 0.16542672784104412, "grad_norm": 7.565179764824264, "learning_rate": 9.526829106456811e-06, "loss": 17.8749, "step": 9050 }, { "epoch": 0.16544500703749063, "grad_norm": 6.884819067861885, "learning_rate": 9.526703402415007e-06, "loss": 17.7371, "step": 9051 }, { "epoch": 0.16546328623393716, "grad_norm": 6.510990120480692, "learning_rate": 9.526577682507504e-06, "loss": 17.6405, "step": 9052 }, { "epoch": 0.16548156543038367, "grad_norm": 6.513347772149104, "learning_rate": 9.526451946734736e-06, "loss": 17.5503, "step": 9053 }, { "epoch": 0.1654998446268302, "grad_norm": 5.428298287850259, "learning_rate": 9.526326195097146e-06, "loss": 17.1256, "step": 9054 }, { "epoch": 0.16551812382327674, "grad_norm": 6.582988013596661, "learning_rate": 9.526200427595178e-06, "loss": 17.6936, "step": 9055 }, { "epoch": 0.16553640301972325, "grad_norm": 7.370843557480547, "learning_rate": 9.526074644229269e-06, "loss": 17.517, "step": 9056 }, { "epoch": 0.16555468221616979, "grad_norm": 6.923825195792302, "learning_rate": 9.525948844999861e-06, "loss": 17.7574, "step": 9057 }, { "epoch": 0.1655729614126163, "grad_norm": 6.8302860552307445, "learning_rate": 9.525823029907396e-06, "loss": 17.2951, "step": 9058 }, { "epoch": 0.16559124060906283, "grad_norm": 6.958696460058351, "learning_rate": 9.525697198952313e-06, "loss": 17.741, "step": 9059 }, { "epoch": 0.16560951980550934, "grad_norm": 6.996366141266741, "learning_rate": 9.525571352135055e-06, "loss": 17.6815, "step": 9060 }, { "epoch": 0.16562779900195587, "grad_norm": 6.157407783203882, "learning_rate": 9.52544548945606e-06, "loss": 17.4198, "step": 9061 }, { "epoch": 0.1656460781984024, "grad_norm": 6.440894364279506, "learning_rate": 9.525319610915773e-06, "loss": 17.3242, "step": 9062 }, { "epoch": 0.16566435739484892, "grad_norm": 6.854729327882623, "learning_rate": 9.525193716514634e-06, "loss": 17.5983, "step": 9063 }, { "epoch": 0.16568263659129545, "grad_norm": 6.171897296806404, "learning_rate": 9.525067806253082e-06, "loss": 17.2897, "step": 9064 }, { "epoch": 0.16570091578774196, "grad_norm": 8.008405996013842, "learning_rate": 9.524941880131562e-06, "loss": 18.3425, "step": 9065 }, { "epoch": 0.1657191949841885, "grad_norm": 6.725834049524768, "learning_rate": 9.52481593815051e-06, "loss": 17.3255, "step": 9066 }, { "epoch": 0.16573747418063503, "grad_norm": 7.92148911913241, "learning_rate": 9.524689980310375e-06, "loss": 17.8377, "step": 9067 }, { "epoch": 0.16575575337708154, "grad_norm": 7.0223075023185775, "learning_rate": 9.524564006611592e-06, "loss": 17.6414, "step": 9068 }, { "epoch": 0.16577403257352807, "grad_norm": 6.3789484707849295, "learning_rate": 9.524438017054604e-06, "loss": 17.4408, "step": 9069 }, { "epoch": 0.16579231176997458, "grad_norm": 5.897977748498922, "learning_rate": 9.524312011639856e-06, "loss": 17.3339, "step": 9070 }, { "epoch": 0.16581059096642112, "grad_norm": 6.207281410268841, "learning_rate": 9.524185990367785e-06, "loss": 17.2, "step": 9071 }, { "epoch": 0.16582887016286765, "grad_norm": 8.342653154892563, "learning_rate": 9.524059953238836e-06, "loss": 18.3694, "step": 9072 }, { "epoch": 0.16584714935931416, "grad_norm": 6.598668454206093, "learning_rate": 9.523933900253448e-06, "loss": 17.6158, "step": 9073 }, { "epoch": 0.1658654285557607, "grad_norm": 6.59926984065775, "learning_rate": 9.523807831412065e-06, "loss": 17.7051, "step": 9074 }, { "epoch": 0.1658837077522072, "grad_norm": 7.1647343560089505, "learning_rate": 9.523681746715128e-06, "loss": 17.5268, "step": 9075 }, { "epoch": 0.16590198694865374, "grad_norm": 7.627179920498086, "learning_rate": 9.523555646163078e-06, "loss": 17.6897, "step": 9076 }, { "epoch": 0.16592026614510025, "grad_norm": 7.637983839679278, "learning_rate": 9.52342952975636e-06, "loss": 18.0547, "step": 9077 }, { "epoch": 0.16593854534154678, "grad_norm": 8.041272758097744, "learning_rate": 9.523303397495414e-06, "loss": 18.2532, "step": 9078 }, { "epoch": 0.16595682453799332, "grad_norm": 6.30967522088222, "learning_rate": 9.52317724938068e-06, "loss": 17.4144, "step": 9079 }, { "epoch": 0.16597510373443983, "grad_norm": 6.995478330767793, "learning_rate": 9.523051085412603e-06, "loss": 17.6971, "step": 9080 }, { "epoch": 0.16599338293088636, "grad_norm": 6.919525674027049, "learning_rate": 9.522924905591625e-06, "loss": 17.7011, "step": 9081 }, { "epoch": 0.16601166212733287, "grad_norm": 7.863781012799176, "learning_rate": 9.522798709918189e-06, "loss": 18.3145, "step": 9082 }, { "epoch": 0.1660299413237794, "grad_norm": 6.244417281280778, "learning_rate": 9.522672498392734e-06, "loss": 17.4247, "step": 9083 }, { "epoch": 0.16604822052022594, "grad_norm": 7.8538816717185895, "learning_rate": 9.522546271015705e-06, "loss": 17.696, "step": 9084 }, { "epoch": 0.16606649971667245, "grad_norm": 9.300180918835974, "learning_rate": 9.522420027787543e-06, "loss": 18.6505, "step": 9085 }, { "epoch": 0.16608477891311899, "grad_norm": 5.7769481419581155, "learning_rate": 9.522293768708691e-06, "loss": 17.0681, "step": 9086 }, { "epoch": 0.1661030581095655, "grad_norm": 7.199260336614925, "learning_rate": 9.522167493779593e-06, "loss": 17.9693, "step": 9087 }, { "epoch": 0.16612133730601203, "grad_norm": 6.379245550741849, "learning_rate": 9.52204120300069e-06, "loss": 17.7245, "step": 9088 }, { "epoch": 0.16613961650245856, "grad_norm": 8.844373939107124, "learning_rate": 9.521914896372424e-06, "loss": 18.4572, "step": 9089 }, { "epoch": 0.16615789569890507, "grad_norm": 8.247430546903153, "learning_rate": 9.52178857389524e-06, "loss": 18.4524, "step": 9090 }, { "epoch": 0.1661761748953516, "grad_norm": 6.444161082567926, "learning_rate": 9.52166223556958e-06, "loss": 17.3894, "step": 9091 }, { "epoch": 0.16619445409179812, "grad_norm": 5.442652376247098, "learning_rate": 9.521535881395884e-06, "loss": 17.0941, "step": 9092 }, { "epoch": 0.16621273328824465, "grad_norm": 7.279601640261967, "learning_rate": 9.5214095113746e-06, "loss": 17.6853, "step": 9093 }, { "epoch": 0.16623101248469116, "grad_norm": 6.189097401956089, "learning_rate": 9.521283125506166e-06, "loss": 17.417, "step": 9094 }, { "epoch": 0.1662492916811377, "grad_norm": 5.91625718018307, "learning_rate": 9.521156723791028e-06, "loss": 17.0529, "step": 9095 }, { "epoch": 0.16626757087758423, "grad_norm": 6.6431258708244565, "learning_rate": 9.521030306229627e-06, "loss": 17.6213, "step": 9096 }, { "epoch": 0.16628585007403074, "grad_norm": 7.330145933182181, "learning_rate": 9.520903872822407e-06, "loss": 17.8557, "step": 9097 }, { "epoch": 0.16630412927047727, "grad_norm": 6.671444664843496, "learning_rate": 9.520777423569812e-06, "loss": 17.6392, "step": 9098 }, { "epoch": 0.16632240846692378, "grad_norm": 7.772729339617621, "learning_rate": 9.520650958472285e-06, "loss": 17.9338, "step": 9099 }, { "epoch": 0.16634068766337032, "grad_norm": 7.466196889005785, "learning_rate": 9.520524477530266e-06, "loss": 17.9547, "step": 9100 }, { "epoch": 0.16635896685981685, "grad_norm": 7.539791802325543, "learning_rate": 9.520397980744204e-06, "loss": 17.8858, "step": 9101 }, { "epoch": 0.16637724605626336, "grad_norm": 7.129833234767096, "learning_rate": 9.520271468114539e-06, "loss": 17.8684, "step": 9102 }, { "epoch": 0.1663955252527099, "grad_norm": 5.323013439138336, "learning_rate": 9.520144939641713e-06, "loss": 16.938, "step": 9103 }, { "epoch": 0.1664138044491564, "grad_norm": 7.393868613524314, "learning_rate": 9.520018395326171e-06, "loss": 18.0491, "step": 9104 }, { "epoch": 0.16643208364560294, "grad_norm": 7.244360215684982, "learning_rate": 9.519891835168359e-06, "loss": 17.443, "step": 9105 }, { "epoch": 0.16645036284204948, "grad_norm": 6.633329129216576, "learning_rate": 9.519765259168716e-06, "loss": 17.2289, "step": 9106 }, { "epoch": 0.16646864203849598, "grad_norm": 6.955839024230898, "learning_rate": 9.519638667327691e-06, "loss": 17.6643, "step": 9107 }, { "epoch": 0.16648692123494252, "grad_norm": 5.691450733880839, "learning_rate": 9.519512059645723e-06, "loss": 17.0938, "step": 9108 }, { "epoch": 0.16650520043138903, "grad_norm": 7.454345822977432, "learning_rate": 9.519385436123256e-06, "loss": 17.7329, "step": 9109 }, { "epoch": 0.16652347962783556, "grad_norm": 8.266789452889764, "learning_rate": 9.519258796760738e-06, "loss": 17.6905, "step": 9110 }, { "epoch": 0.16654175882428207, "grad_norm": 9.33813301366781, "learning_rate": 9.519132141558607e-06, "loss": 18.6763, "step": 9111 }, { "epoch": 0.1665600380207286, "grad_norm": 8.822979246389119, "learning_rate": 9.519005470517312e-06, "loss": 18.5242, "step": 9112 }, { "epoch": 0.16657831721717514, "grad_norm": 8.916193767795104, "learning_rate": 9.518878783637296e-06, "loss": 18.3267, "step": 9113 }, { "epoch": 0.16659659641362165, "grad_norm": 8.055989505768691, "learning_rate": 9.518752080918999e-06, "loss": 18.1398, "step": 9114 }, { "epoch": 0.16661487561006819, "grad_norm": 12.760266756194506, "learning_rate": 9.51862536236287e-06, "loss": 18.4892, "step": 9115 }, { "epoch": 0.1666331548065147, "grad_norm": 7.3553093464905235, "learning_rate": 9.518498627969351e-06, "loss": 18.0986, "step": 9116 }, { "epoch": 0.16665143400296123, "grad_norm": 7.113153293673835, "learning_rate": 9.518371877738885e-06, "loss": 17.8732, "step": 9117 }, { "epoch": 0.16666971319940777, "grad_norm": 7.311254523474977, "learning_rate": 9.51824511167192e-06, "loss": 17.6422, "step": 9118 }, { "epoch": 0.16668799239585427, "grad_norm": 7.902962118314464, "learning_rate": 9.518118329768897e-06, "loss": 18.2016, "step": 9119 }, { "epoch": 0.1667062715923008, "grad_norm": 6.982253601525164, "learning_rate": 9.51799153203026e-06, "loss": 17.5656, "step": 9120 }, { "epoch": 0.16672455078874732, "grad_norm": 9.64107093255908, "learning_rate": 9.517864718456457e-06, "loss": 18.3941, "step": 9121 }, { "epoch": 0.16674282998519385, "grad_norm": 6.648779777416434, "learning_rate": 9.51773788904793e-06, "loss": 17.7655, "step": 9122 }, { "epoch": 0.1667611091816404, "grad_norm": 7.52096006546244, "learning_rate": 9.517611043805122e-06, "loss": 18.342, "step": 9123 }, { "epoch": 0.1667793883780869, "grad_norm": 6.967318309335177, "learning_rate": 9.517484182728481e-06, "loss": 17.4008, "step": 9124 }, { "epoch": 0.16679766757453343, "grad_norm": 5.507922484346446, "learning_rate": 9.517357305818447e-06, "loss": 16.934, "step": 9125 }, { "epoch": 0.16681594677097994, "grad_norm": 8.284403361115952, "learning_rate": 9.517230413075471e-06, "loss": 18.146, "step": 9126 }, { "epoch": 0.16683422596742647, "grad_norm": 6.162027763574121, "learning_rate": 9.517103504499993e-06, "loss": 17.4649, "step": 9127 }, { "epoch": 0.16685250516387298, "grad_norm": 5.993646032662872, "learning_rate": 9.516976580092459e-06, "loss": 17.2926, "step": 9128 }, { "epoch": 0.16687078436031952, "grad_norm": 7.503613357613627, "learning_rate": 9.516849639853314e-06, "loss": 18.0304, "step": 9129 }, { "epoch": 0.16688906355676605, "grad_norm": 6.890723043094061, "learning_rate": 9.516722683783003e-06, "loss": 18.1006, "step": 9130 }, { "epoch": 0.16690734275321256, "grad_norm": 6.5303244166727525, "learning_rate": 9.516595711881972e-06, "loss": 17.6837, "step": 9131 }, { "epoch": 0.1669256219496591, "grad_norm": 7.579422848183306, "learning_rate": 9.516468724150664e-06, "loss": 18.1218, "step": 9132 }, { "epoch": 0.1669439011461056, "grad_norm": 7.730681670507118, "learning_rate": 9.516341720589525e-06, "loss": 17.8222, "step": 9133 }, { "epoch": 0.16696218034255214, "grad_norm": 5.854139019858464, "learning_rate": 9.516214701199e-06, "loss": 17.2757, "step": 9134 }, { "epoch": 0.16698045953899868, "grad_norm": 9.443290375382677, "learning_rate": 9.516087665979536e-06, "loss": 17.5585, "step": 9135 }, { "epoch": 0.16699873873544518, "grad_norm": 7.466096064656885, "learning_rate": 9.515960614931575e-06, "loss": 18.0145, "step": 9136 }, { "epoch": 0.16701701793189172, "grad_norm": 6.513489335465951, "learning_rate": 9.515833548055563e-06, "loss": 17.6289, "step": 9137 }, { "epoch": 0.16703529712833823, "grad_norm": 7.957763787735906, "learning_rate": 9.51570646535195e-06, "loss": 18.057, "step": 9138 }, { "epoch": 0.16705357632478476, "grad_norm": 7.047812347213954, "learning_rate": 9.515579366821175e-06, "loss": 17.9389, "step": 9139 }, { "epoch": 0.1670718555212313, "grad_norm": 6.718738424242314, "learning_rate": 9.515452252463687e-06, "loss": 17.5463, "step": 9140 }, { "epoch": 0.1670901347176778, "grad_norm": 6.0169625408681835, "learning_rate": 9.51532512227993e-06, "loss": 17.4768, "step": 9141 }, { "epoch": 0.16710841391412434, "grad_norm": 6.4821786910550365, "learning_rate": 9.51519797627035e-06, "loss": 17.5259, "step": 9142 }, { "epoch": 0.16712669311057085, "grad_norm": 6.517838393509723, "learning_rate": 9.515070814435394e-06, "loss": 17.4701, "step": 9143 }, { "epoch": 0.1671449723070174, "grad_norm": 8.17111782001087, "learning_rate": 9.514943636775504e-06, "loss": 18.0557, "step": 9144 }, { "epoch": 0.1671632515034639, "grad_norm": 7.30316783584968, "learning_rate": 9.51481644329113e-06, "loss": 17.4473, "step": 9145 }, { "epoch": 0.16718153069991043, "grad_norm": 7.990399924648765, "learning_rate": 9.514689233982718e-06, "loss": 17.9287, "step": 9146 }, { "epoch": 0.16719980989635697, "grad_norm": 7.193933282375789, "learning_rate": 9.51456200885071e-06, "loss": 17.6412, "step": 9147 }, { "epoch": 0.16721808909280347, "grad_norm": 8.033713818889083, "learning_rate": 9.514434767895555e-06, "loss": 18.1817, "step": 9148 }, { "epoch": 0.16723636828925, "grad_norm": 6.876274301198719, "learning_rate": 9.514307511117697e-06, "loss": 17.4852, "step": 9149 }, { "epoch": 0.16725464748569652, "grad_norm": 9.832202016975332, "learning_rate": 9.514180238517583e-06, "loss": 18.4228, "step": 9150 }, { "epoch": 0.16727292668214305, "grad_norm": 7.205506056626777, "learning_rate": 9.514052950095659e-06, "loss": 17.7711, "step": 9151 }, { "epoch": 0.1672912058785896, "grad_norm": 7.148710541106331, "learning_rate": 9.51392564585237e-06, "loss": 17.6796, "step": 9152 }, { "epoch": 0.1673094850750361, "grad_norm": 7.348555779791997, "learning_rate": 9.513798325788167e-06, "loss": 17.7698, "step": 9153 }, { "epoch": 0.16732776427148263, "grad_norm": 7.677600866506125, "learning_rate": 9.51367098990349e-06, "loss": 17.7246, "step": 9154 }, { "epoch": 0.16734604346792914, "grad_norm": 7.2066128084205285, "learning_rate": 9.513543638198787e-06, "loss": 17.3363, "step": 9155 }, { "epoch": 0.16736432266437568, "grad_norm": 7.187256040340105, "learning_rate": 9.513416270674506e-06, "loss": 17.952, "step": 9156 }, { "epoch": 0.1673826018608222, "grad_norm": 6.73423065779139, "learning_rate": 9.513288887331093e-06, "loss": 17.8486, "step": 9157 }, { "epoch": 0.16740088105726872, "grad_norm": 7.845017050587436, "learning_rate": 9.513161488168993e-06, "loss": 18.1444, "step": 9158 }, { "epoch": 0.16741916025371525, "grad_norm": 7.398952054211818, "learning_rate": 9.513034073188656e-06, "loss": 18.2226, "step": 9159 }, { "epoch": 0.16743743945016176, "grad_norm": 6.692649336390622, "learning_rate": 9.512906642390526e-06, "loss": 17.5083, "step": 9160 }, { "epoch": 0.1674557186466083, "grad_norm": 6.974035709920217, "learning_rate": 9.512779195775048e-06, "loss": 17.8033, "step": 9161 }, { "epoch": 0.1674739978430548, "grad_norm": 6.330347140989411, "learning_rate": 9.512651733342671e-06, "loss": 17.5932, "step": 9162 }, { "epoch": 0.16749227703950134, "grad_norm": 7.61732463302869, "learning_rate": 9.51252425509384e-06, "loss": 17.8264, "step": 9163 }, { "epoch": 0.16751055623594788, "grad_norm": 7.355653977371296, "learning_rate": 9.512396761029004e-06, "loss": 17.8449, "step": 9164 }, { "epoch": 0.16752883543239439, "grad_norm": 6.141978303978035, "learning_rate": 9.51226925114861e-06, "loss": 17.3168, "step": 9165 }, { "epoch": 0.16754711462884092, "grad_norm": 6.877778001090658, "learning_rate": 9.512141725453103e-06, "loss": 17.7299, "step": 9166 }, { "epoch": 0.16756539382528743, "grad_norm": 6.62830012161467, "learning_rate": 9.512014183942932e-06, "loss": 17.5122, "step": 9167 }, { "epoch": 0.16758367302173396, "grad_norm": 7.861931039407114, "learning_rate": 9.51188662661854e-06, "loss": 18.0033, "step": 9168 }, { "epoch": 0.1676019522181805, "grad_norm": 6.158488854561656, "learning_rate": 9.51175905348038e-06, "loss": 17.3119, "step": 9169 }, { "epoch": 0.167620231414627, "grad_norm": 7.31101099269187, "learning_rate": 9.511631464528894e-06, "loss": 17.6251, "step": 9170 }, { "epoch": 0.16763851061107354, "grad_norm": 6.069969422578263, "learning_rate": 9.511503859764532e-06, "loss": 17.281, "step": 9171 }, { "epoch": 0.16765678980752005, "grad_norm": 7.98784248537527, "learning_rate": 9.511376239187741e-06, "loss": 18.3653, "step": 9172 }, { "epoch": 0.1676750690039666, "grad_norm": 7.4389387664164515, "learning_rate": 9.511248602798967e-06, "loss": 17.8475, "step": 9173 }, { "epoch": 0.16769334820041312, "grad_norm": 8.398094097487682, "learning_rate": 9.511120950598659e-06, "loss": 17.9461, "step": 9174 }, { "epoch": 0.16771162739685963, "grad_norm": 5.612258948719771, "learning_rate": 9.510993282587262e-06, "loss": 17.1776, "step": 9175 }, { "epoch": 0.16772990659330617, "grad_norm": 7.1766094368171505, "learning_rate": 9.510865598765227e-06, "loss": 17.7494, "step": 9176 }, { "epoch": 0.16774818578975267, "grad_norm": 7.2923563662027195, "learning_rate": 9.510737899132998e-06, "loss": 18.1259, "step": 9177 }, { "epoch": 0.1677664649861992, "grad_norm": 6.7949270375861195, "learning_rate": 9.510610183691023e-06, "loss": 17.5615, "step": 9178 }, { "epoch": 0.16778474418264572, "grad_norm": 8.461201564995129, "learning_rate": 9.510482452439753e-06, "loss": 18.3026, "step": 9179 }, { "epoch": 0.16780302337909225, "grad_norm": 6.799459045467971, "learning_rate": 9.510354705379632e-06, "loss": 17.7901, "step": 9180 }, { "epoch": 0.1678213025755388, "grad_norm": 8.015234311899766, "learning_rate": 9.51022694251111e-06, "loss": 17.9572, "step": 9181 }, { "epoch": 0.1678395817719853, "grad_norm": 6.784102337008483, "learning_rate": 9.510099163834633e-06, "loss": 17.7439, "step": 9182 }, { "epoch": 0.16785786096843183, "grad_norm": 7.6356079692659655, "learning_rate": 9.50997136935065e-06, "loss": 18.1289, "step": 9183 }, { "epoch": 0.16787614016487834, "grad_norm": 6.4649360917136995, "learning_rate": 9.50984355905961e-06, "loss": 17.3035, "step": 9184 }, { "epoch": 0.16789441936132488, "grad_norm": 5.971468991654149, "learning_rate": 9.509715732961959e-06, "loss": 17.2778, "step": 9185 }, { "epoch": 0.1679126985577714, "grad_norm": 7.19222144592288, "learning_rate": 9.509587891058144e-06, "loss": 17.628, "step": 9186 }, { "epoch": 0.16793097775421792, "grad_norm": 5.6168211935698436, "learning_rate": 9.509460033348617e-06, "loss": 17.0353, "step": 9187 }, { "epoch": 0.16794925695066446, "grad_norm": 7.143353577569963, "learning_rate": 9.509332159833824e-06, "loss": 17.691, "step": 9188 }, { "epoch": 0.16796753614711096, "grad_norm": 7.99527562581922, "learning_rate": 9.509204270514212e-06, "loss": 18.5328, "step": 9189 }, { "epoch": 0.1679858153435575, "grad_norm": 8.806796971586664, "learning_rate": 9.509076365390231e-06, "loss": 18.1942, "step": 9190 }, { "epoch": 0.16800409454000403, "grad_norm": 6.80142875338237, "learning_rate": 9.508948444462327e-06, "loss": 17.8174, "step": 9191 }, { "epoch": 0.16802237373645054, "grad_norm": 8.82466589624803, "learning_rate": 9.508820507730952e-06, "loss": 18.6217, "step": 9192 }, { "epoch": 0.16804065293289708, "grad_norm": 7.190350624726181, "learning_rate": 9.50869255519655e-06, "loss": 17.8272, "step": 9193 }, { "epoch": 0.16805893212934359, "grad_norm": 7.015287144182023, "learning_rate": 9.508564586859575e-06, "loss": 17.9058, "step": 9194 }, { "epoch": 0.16807721132579012, "grad_norm": 5.8777521897708525, "learning_rate": 9.50843660272047e-06, "loss": 17.3103, "step": 9195 }, { "epoch": 0.16809549052223663, "grad_norm": 7.262802111291724, "learning_rate": 9.508308602779686e-06, "loss": 18.0411, "step": 9196 }, { "epoch": 0.16811376971868316, "grad_norm": 7.969769699031251, "learning_rate": 9.508180587037673e-06, "loss": 17.8796, "step": 9197 }, { "epoch": 0.1681320489151297, "grad_norm": 8.27773983726501, "learning_rate": 9.508052555494878e-06, "loss": 18.2145, "step": 9198 }, { "epoch": 0.1681503281115762, "grad_norm": 7.160206735734677, "learning_rate": 9.50792450815175e-06, "loss": 17.6246, "step": 9199 }, { "epoch": 0.16816860730802274, "grad_norm": 8.286567525870572, "learning_rate": 9.507796445008737e-06, "loss": 17.7956, "step": 9200 }, { "epoch": 0.16818688650446925, "grad_norm": 7.4618560161589444, "learning_rate": 9.507668366066289e-06, "loss": 17.9003, "step": 9201 }, { "epoch": 0.1682051657009158, "grad_norm": 6.215729319601452, "learning_rate": 9.507540271324852e-06, "loss": 17.2985, "step": 9202 }, { "epoch": 0.16822344489736232, "grad_norm": 8.174184185896188, "learning_rate": 9.507412160784881e-06, "loss": 18.2771, "step": 9203 }, { "epoch": 0.16824172409380883, "grad_norm": 5.97025787633085, "learning_rate": 9.50728403444682e-06, "loss": 17.3012, "step": 9204 }, { "epoch": 0.16826000329025537, "grad_norm": 8.322850448430295, "learning_rate": 9.50715589231112e-06, "loss": 17.9653, "step": 9205 }, { "epoch": 0.16827828248670187, "grad_norm": 6.128258394920188, "learning_rate": 9.50702773437823e-06, "loss": 17.4, "step": 9206 }, { "epoch": 0.1682965616831484, "grad_norm": 5.603444969341588, "learning_rate": 9.5068995606486e-06, "loss": 17.2268, "step": 9207 }, { "epoch": 0.16831484087959495, "grad_norm": 6.814603802889895, "learning_rate": 9.506771371122676e-06, "loss": 17.7093, "step": 9208 }, { "epoch": 0.16833312007604145, "grad_norm": 7.321675236998762, "learning_rate": 9.50664316580091e-06, "loss": 17.8145, "step": 9209 }, { "epoch": 0.168351399272488, "grad_norm": 6.474422885000199, "learning_rate": 9.506514944683752e-06, "loss": 17.464, "step": 9210 }, { "epoch": 0.1683696784689345, "grad_norm": 7.30339512472791, "learning_rate": 9.506386707771648e-06, "loss": 17.8909, "step": 9211 }, { "epoch": 0.16838795766538103, "grad_norm": 7.456931045903403, "learning_rate": 9.50625845506505e-06, "loss": 17.8489, "step": 9212 }, { "epoch": 0.16840623686182754, "grad_norm": 7.463795746898944, "learning_rate": 9.506130186564408e-06, "loss": 18.1574, "step": 9213 }, { "epoch": 0.16842451605827408, "grad_norm": 8.117000953008189, "learning_rate": 9.50600190227017e-06, "loss": 18.2337, "step": 9214 }, { "epoch": 0.1684427952547206, "grad_norm": 7.830745084486118, "learning_rate": 9.505873602182788e-06, "loss": 18.0402, "step": 9215 }, { "epoch": 0.16846107445116712, "grad_norm": 9.237325081003904, "learning_rate": 9.505745286302707e-06, "loss": 18.5084, "step": 9216 }, { "epoch": 0.16847935364761366, "grad_norm": 7.6594654940174856, "learning_rate": 9.505616954630383e-06, "loss": 18.4119, "step": 9217 }, { "epoch": 0.16849763284406016, "grad_norm": 6.192285300112505, "learning_rate": 9.505488607166262e-06, "loss": 17.3778, "step": 9218 }, { "epoch": 0.1685159120405067, "grad_norm": 9.209633641285492, "learning_rate": 9.505360243910792e-06, "loss": 18.6063, "step": 9219 }, { "epoch": 0.16853419123695323, "grad_norm": 9.379714619614132, "learning_rate": 9.505231864864429e-06, "loss": 18.6675, "step": 9220 }, { "epoch": 0.16855247043339974, "grad_norm": 8.075719850020935, "learning_rate": 9.505103470027617e-06, "loss": 18.3677, "step": 9221 }, { "epoch": 0.16857074962984628, "grad_norm": 8.194403618110798, "learning_rate": 9.504975059400807e-06, "loss": 18.0993, "step": 9222 }, { "epoch": 0.16858902882629279, "grad_norm": 6.596985374575552, "learning_rate": 9.504846632984452e-06, "loss": 17.5941, "step": 9223 }, { "epoch": 0.16860730802273932, "grad_norm": 6.485470657566503, "learning_rate": 9.504718190779001e-06, "loss": 17.6721, "step": 9224 }, { "epoch": 0.16862558721918586, "grad_norm": 6.6423030234339295, "learning_rate": 9.504589732784903e-06, "loss": 17.5654, "step": 9225 }, { "epoch": 0.16864386641563237, "grad_norm": 6.915606847521376, "learning_rate": 9.50446125900261e-06, "loss": 17.7335, "step": 9226 }, { "epoch": 0.1686621456120789, "grad_norm": 5.773394547521735, "learning_rate": 9.50433276943257e-06, "loss": 17.2237, "step": 9227 }, { "epoch": 0.1686804248085254, "grad_norm": 7.11829780011999, "learning_rate": 9.504204264075234e-06, "loss": 17.641, "step": 9228 }, { "epoch": 0.16869870400497194, "grad_norm": 6.4137613968029505, "learning_rate": 9.504075742931052e-06, "loss": 17.5393, "step": 9229 }, { "epoch": 0.16871698320141845, "grad_norm": 6.7655899129952255, "learning_rate": 9.503947206000477e-06, "loss": 17.5254, "step": 9230 }, { "epoch": 0.168735262397865, "grad_norm": 5.51686495662697, "learning_rate": 9.503818653283959e-06, "loss": 17.1054, "step": 9231 }, { "epoch": 0.16875354159431152, "grad_norm": 7.395907813258995, "learning_rate": 9.503690084781945e-06, "loss": 18.1155, "step": 9232 }, { "epoch": 0.16877182079075803, "grad_norm": 6.682609950792527, "learning_rate": 9.503561500494889e-06, "loss": 17.6452, "step": 9233 }, { "epoch": 0.16879009998720457, "grad_norm": 6.556590064553701, "learning_rate": 9.503432900423242e-06, "loss": 17.8319, "step": 9234 }, { "epoch": 0.16880837918365107, "grad_norm": 6.682219631635365, "learning_rate": 9.503304284567451e-06, "loss": 17.5929, "step": 9235 }, { "epoch": 0.1688266583800976, "grad_norm": 8.022964534443789, "learning_rate": 9.503175652927972e-06, "loss": 17.993, "step": 9236 }, { "epoch": 0.16884493757654415, "grad_norm": 7.680467349943672, "learning_rate": 9.50304700550525e-06, "loss": 18.2086, "step": 9237 }, { "epoch": 0.16886321677299065, "grad_norm": 7.797938679905344, "learning_rate": 9.502918342299742e-06, "loss": 17.9965, "step": 9238 }, { "epoch": 0.1688814959694372, "grad_norm": 8.175512592833154, "learning_rate": 9.502789663311896e-06, "loss": 17.7192, "step": 9239 }, { "epoch": 0.1688997751658837, "grad_norm": 7.807439592723891, "learning_rate": 9.50266096854216e-06, "loss": 18.2918, "step": 9240 }, { "epoch": 0.16891805436233023, "grad_norm": 7.1402803356561355, "learning_rate": 9.502532257990991e-06, "loss": 17.7497, "step": 9241 }, { "epoch": 0.16893633355877677, "grad_norm": 7.1479553245385405, "learning_rate": 9.502403531658834e-06, "loss": 17.7492, "step": 9242 }, { "epoch": 0.16895461275522328, "grad_norm": 7.6740459373006615, "learning_rate": 9.502274789546147e-06, "loss": 18.0036, "step": 9243 }, { "epoch": 0.1689728919516698, "grad_norm": 7.04948573680895, "learning_rate": 9.502146031653375e-06, "loss": 17.817, "step": 9244 }, { "epoch": 0.16899117114811632, "grad_norm": 7.472060819134237, "learning_rate": 9.502017257980972e-06, "loss": 17.7266, "step": 9245 }, { "epoch": 0.16900945034456286, "grad_norm": 11.345342492889468, "learning_rate": 9.50188846852939e-06, "loss": 18.4252, "step": 9246 }, { "epoch": 0.16902772954100936, "grad_norm": 6.324292922016057, "learning_rate": 9.501759663299079e-06, "loss": 17.4231, "step": 9247 }, { "epoch": 0.1690460087374559, "grad_norm": 7.947575167075575, "learning_rate": 9.501630842290492e-06, "loss": 18.4636, "step": 9248 }, { "epoch": 0.16906428793390244, "grad_norm": 7.064056122783764, "learning_rate": 9.501502005504079e-06, "loss": 17.7031, "step": 9249 }, { "epoch": 0.16908256713034894, "grad_norm": 6.607574761603722, "learning_rate": 9.501373152940292e-06, "loss": 17.5506, "step": 9250 }, { "epoch": 0.16910084632679548, "grad_norm": 7.47745336555634, "learning_rate": 9.501244284599582e-06, "loss": 18.0999, "step": 9251 }, { "epoch": 0.169119125523242, "grad_norm": 6.442336287804173, "learning_rate": 9.501115400482401e-06, "loss": 17.6483, "step": 9252 }, { "epoch": 0.16913740471968852, "grad_norm": 6.689660504446449, "learning_rate": 9.500986500589204e-06, "loss": 17.7348, "step": 9253 }, { "epoch": 0.16915568391613506, "grad_norm": 7.0556173662591055, "learning_rate": 9.500857584920438e-06, "loss": 17.8223, "step": 9254 }, { "epoch": 0.16917396311258157, "grad_norm": 6.435332083102732, "learning_rate": 9.500728653476556e-06, "loss": 17.435, "step": 9255 }, { "epoch": 0.1691922423090281, "grad_norm": 8.178954829322633, "learning_rate": 9.500599706258012e-06, "loss": 18.1133, "step": 9256 }, { "epoch": 0.1692105215054746, "grad_norm": 7.486000079432466, "learning_rate": 9.500470743265256e-06, "loss": 17.9521, "step": 9257 }, { "epoch": 0.16922880070192114, "grad_norm": 7.414235605822304, "learning_rate": 9.50034176449874e-06, "loss": 17.9106, "step": 9258 }, { "epoch": 0.16924707989836768, "grad_norm": 7.29957041738579, "learning_rate": 9.500212769958916e-06, "loss": 17.9123, "step": 9259 }, { "epoch": 0.1692653590948142, "grad_norm": 6.419324021770509, "learning_rate": 9.50008375964624e-06, "loss": 17.5822, "step": 9260 }, { "epoch": 0.16928363829126072, "grad_norm": 7.183503307851751, "learning_rate": 9.499954733561159e-06, "loss": 17.827, "step": 9261 }, { "epoch": 0.16930191748770723, "grad_norm": 5.9867549399642535, "learning_rate": 9.499825691704125e-06, "loss": 17.3328, "step": 9262 }, { "epoch": 0.16932019668415377, "grad_norm": 6.538978287646619, "learning_rate": 9.499696634075595e-06, "loss": 17.1429, "step": 9263 }, { "epoch": 0.16933847588060028, "grad_norm": 6.492351933050396, "learning_rate": 9.499567560676018e-06, "loss": 17.5489, "step": 9264 }, { "epoch": 0.1693567550770468, "grad_norm": 7.239262810517472, "learning_rate": 9.499438471505848e-06, "loss": 17.7833, "step": 9265 }, { "epoch": 0.16937503427349335, "grad_norm": 6.616435648759427, "learning_rate": 9.499309366565536e-06, "loss": 17.4924, "step": 9266 }, { "epoch": 0.16939331346993985, "grad_norm": 6.554446689823281, "learning_rate": 9.499180245855535e-06, "loss": 17.3935, "step": 9267 }, { "epoch": 0.1694115926663864, "grad_norm": 6.5185668445542655, "learning_rate": 9.499051109376298e-06, "loss": 17.4669, "step": 9268 }, { "epoch": 0.1694298718628329, "grad_norm": 7.007126029712472, "learning_rate": 9.498921957128278e-06, "loss": 17.9778, "step": 9269 }, { "epoch": 0.16944815105927943, "grad_norm": 8.031631202231285, "learning_rate": 9.498792789111924e-06, "loss": 18.2722, "step": 9270 }, { "epoch": 0.16946643025572597, "grad_norm": 6.224877837271482, "learning_rate": 9.498663605327694e-06, "loss": 17.4046, "step": 9271 }, { "epoch": 0.16948470945217248, "grad_norm": 7.110763781889539, "learning_rate": 9.498534405776038e-06, "loss": 17.8919, "step": 9272 }, { "epoch": 0.169502988648619, "grad_norm": 7.6365904652147885, "learning_rate": 9.498405190457409e-06, "loss": 17.9798, "step": 9273 }, { "epoch": 0.16952126784506552, "grad_norm": 8.219589460169788, "learning_rate": 9.49827595937226e-06, "loss": 18.0456, "step": 9274 }, { "epoch": 0.16953954704151206, "grad_norm": 7.332579281884612, "learning_rate": 9.498146712521046e-06, "loss": 17.9169, "step": 9275 }, { "epoch": 0.1695578262379586, "grad_norm": 6.298096144118503, "learning_rate": 9.498017449904216e-06, "loss": 17.4031, "step": 9276 }, { "epoch": 0.1695761054344051, "grad_norm": 7.552642894361333, "learning_rate": 9.497888171522227e-06, "loss": 17.9247, "step": 9277 }, { "epoch": 0.16959438463085164, "grad_norm": 6.453234743792428, "learning_rate": 9.497758877375528e-06, "loss": 17.3823, "step": 9278 }, { "epoch": 0.16961266382729814, "grad_norm": 8.067008263297412, "learning_rate": 9.497629567464576e-06, "loss": 17.763, "step": 9279 }, { "epoch": 0.16963094302374468, "grad_norm": 8.0629395551368, "learning_rate": 9.497500241789822e-06, "loss": 17.9484, "step": 9280 }, { "epoch": 0.1696492222201912, "grad_norm": 7.35078081421746, "learning_rate": 9.49737090035172e-06, "loss": 17.988, "step": 9281 }, { "epoch": 0.16966750141663772, "grad_norm": 7.718604409984683, "learning_rate": 9.497241543150724e-06, "loss": 17.883, "step": 9282 }, { "epoch": 0.16968578061308426, "grad_norm": 6.59497357682199, "learning_rate": 9.497112170187287e-06, "loss": 17.394, "step": 9283 }, { "epoch": 0.16970405980953077, "grad_norm": 7.051795670369403, "learning_rate": 9.49698278146186e-06, "loss": 17.551, "step": 9284 }, { "epoch": 0.1697223390059773, "grad_norm": 7.299070725776896, "learning_rate": 9.4968533769749e-06, "loss": 17.7815, "step": 9285 }, { "epoch": 0.1697406182024238, "grad_norm": 7.968673991617567, "learning_rate": 9.49672395672686e-06, "loss": 17.9397, "step": 9286 }, { "epoch": 0.16975889739887035, "grad_norm": 7.478284411752328, "learning_rate": 9.49659452071819e-06, "loss": 18.2165, "step": 9287 }, { "epoch": 0.16977717659531688, "grad_norm": 6.5996511026123, "learning_rate": 9.496465068949348e-06, "loss": 17.4892, "step": 9288 }, { "epoch": 0.1697954557917634, "grad_norm": 6.682420819376153, "learning_rate": 9.496335601420786e-06, "loss": 17.5018, "step": 9289 }, { "epoch": 0.16981373498820992, "grad_norm": 5.949975487642374, "learning_rate": 9.496206118132958e-06, "loss": 17.3644, "step": 9290 }, { "epoch": 0.16983201418465643, "grad_norm": 6.953868735342993, "learning_rate": 9.496076619086318e-06, "loss": 17.7816, "step": 9291 }, { "epoch": 0.16985029338110297, "grad_norm": 7.628905059730847, "learning_rate": 9.495947104281318e-06, "loss": 17.9063, "step": 9292 }, { "epoch": 0.1698685725775495, "grad_norm": 6.472233724623038, "learning_rate": 9.495817573718415e-06, "loss": 17.8451, "step": 9293 }, { "epoch": 0.169886851773996, "grad_norm": 6.881108318120469, "learning_rate": 9.495688027398062e-06, "loss": 17.6711, "step": 9294 }, { "epoch": 0.16990513097044255, "grad_norm": 7.015050799593046, "learning_rate": 9.495558465320712e-06, "loss": 17.7221, "step": 9295 }, { "epoch": 0.16992341016688906, "grad_norm": 6.518346540265528, "learning_rate": 9.49542888748682e-06, "loss": 17.5115, "step": 9296 }, { "epoch": 0.1699416893633356, "grad_norm": 6.619956820599698, "learning_rate": 9.495299293896839e-06, "loss": 17.5084, "step": 9297 }, { "epoch": 0.1699599685597821, "grad_norm": 7.572798054728141, "learning_rate": 9.495169684551224e-06, "loss": 17.9554, "step": 9298 }, { "epoch": 0.16997824775622863, "grad_norm": 6.131299772610638, "learning_rate": 9.495040059450431e-06, "loss": 17.3223, "step": 9299 }, { "epoch": 0.16999652695267517, "grad_norm": 8.178945579484868, "learning_rate": 9.494910418594911e-06, "loss": 17.4849, "step": 9300 }, { "epoch": 0.17001480614912168, "grad_norm": 7.652603534647454, "learning_rate": 9.494780761985121e-06, "loss": 17.8424, "step": 9301 }, { "epoch": 0.1700330853455682, "grad_norm": 5.783751209083505, "learning_rate": 9.494651089621514e-06, "loss": 17.2083, "step": 9302 }, { "epoch": 0.17005136454201472, "grad_norm": 7.664250666867046, "learning_rate": 9.494521401504543e-06, "loss": 17.8247, "step": 9303 }, { "epoch": 0.17006964373846126, "grad_norm": 5.82619101418688, "learning_rate": 9.494391697634669e-06, "loss": 17.4791, "step": 9304 }, { "epoch": 0.1700879229349078, "grad_norm": 7.838781534300981, "learning_rate": 9.494261978012339e-06, "loss": 18.1943, "step": 9305 }, { "epoch": 0.1701062021313543, "grad_norm": 5.987071466722858, "learning_rate": 9.49413224263801e-06, "loss": 17.2892, "step": 9306 }, { "epoch": 0.17012448132780084, "grad_norm": 8.175921098814772, "learning_rate": 9.49400249151214e-06, "loss": 18.1773, "step": 9307 }, { "epoch": 0.17014276052424734, "grad_norm": 8.073834735977083, "learning_rate": 9.493872724635181e-06, "loss": 18.4408, "step": 9308 }, { "epoch": 0.17016103972069388, "grad_norm": 7.009521229929869, "learning_rate": 9.493742942007586e-06, "loss": 17.8554, "step": 9309 }, { "epoch": 0.17017931891714042, "grad_norm": 7.246449100658442, "learning_rate": 9.493613143629812e-06, "loss": 17.4506, "step": 9310 }, { "epoch": 0.17019759811358692, "grad_norm": 6.370295476618179, "learning_rate": 9.493483329502315e-06, "loss": 17.4496, "step": 9311 }, { "epoch": 0.17021587731003346, "grad_norm": 6.840840264164594, "learning_rate": 9.49335349962555e-06, "loss": 17.5516, "step": 9312 }, { "epoch": 0.17023415650647997, "grad_norm": 6.1046331762652155, "learning_rate": 9.493223653999968e-06, "loss": 17.2566, "step": 9313 }, { "epoch": 0.1702524357029265, "grad_norm": 7.016368323732165, "learning_rate": 9.493093792626029e-06, "loss": 18.0346, "step": 9314 }, { "epoch": 0.170270714899373, "grad_norm": 6.76175892001121, "learning_rate": 9.492963915504188e-06, "loss": 17.7956, "step": 9315 }, { "epoch": 0.17028899409581955, "grad_norm": 6.531249376786396, "learning_rate": 9.492834022634895e-06, "loss": 17.4568, "step": 9316 }, { "epoch": 0.17030727329226608, "grad_norm": 7.5987658416392225, "learning_rate": 9.49270411401861e-06, "loss": 17.7604, "step": 9317 }, { "epoch": 0.1703255524887126, "grad_norm": 7.431519081490518, "learning_rate": 9.492574189655786e-06, "loss": 17.8669, "step": 9318 }, { "epoch": 0.17034383168515912, "grad_norm": 7.958235479289281, "learning_rate": 9.49244424954688e-06, "loss": 18.2507, "step": 9319 }, { "epoch": 0.17036211088160563, "grad_norm": 6.986082779483737, "learning_rate": 9.492314293692348e-06, "loss": 17.9994, "step": 9320 }, { "epoch": 0.17038039007805217, "grad_norm": 8.672267116506786, "learning_rate": 9.492184322092642e-06, "loss": 18.2985, "step": 9321 }, { "epoch": 0.1703986692744987, "grad_norm": 8.225867360448504, "learning_rate": 9.492054334748221e-06, "loss": 18.4201, "step": 9322 }, { "epoch": 0.1704169484709452, "grad_norm": 7.556418871422229, "learning_rate": 9.491924331659539e-06, "loss": 18.1668, "step": 9323 }, { "epoch": 0.17043522766739175, "grad_norm": 7.06440955032604, "learning_rate": 9.491794312827051e-06, "loss": 17.9922, "step": 9324 }, { "epoch": 0.17045350686383826, "grad_norm": 6.717742517738556, "learning_rate": 9.491664278251215e-06, "loss": 17.4833, "step": 9325 }, { "epoch": 0.1704717860602848, "grad_norm": 12.546466403581084, "learning_rate": 9.491534227932487e-06, "loss": 18.7726, "step": 9326 }, { "epoch": 0.17049006525673133, "grad_norm": 8.062593558854038, "learning_rate": 9.49140416187132e-06, "loss": 18.2224, "step": 9327 }, { "epoch": 0.17050834445317783, "grad_norm": 7.0581222055222685, "learning_rate": 9.49127408006817e-06, "loss": 17.5139, "step": 9328 }, { "epoch": 0.17052662364962437, "grad_norm": 7.099344618501691, "learning_rate": 9.491143982523494e-06, "loss": 17.9362, "step": 9329 }, { "epoch": 0.17054490284607088, "grad_norm": 7.402714202305478, "learning_rate": 9.49101386923775e-06, "loss": 18.0988, "step": 9330 }, { "epoch": 0.17056318204251741, "grad_norm": 7.7416532297368645, "learning_rate": 9.490883740211392e-06, "loss": 18.1308, "step": 9331 }, { "epoch": 0.17058146123896392, "grad_norm": 5.993456403412218, "learning_rate": 9.490753595444875e-06, "loss": 17.3263, "step": 9332 }, { "epoch": 0.17059974043541046, "grad_norm": 7.382773762165547, "learning_rate": 9.490623434938655e-06, "loss": 18.0158, "step": 9333 }, { "epoch": 0.170618019631857, "grad_norm": 6.90202197645296, "learning_rate": 9.490493258693192e-06, "loss": 17.852, "step": 9334 }, { "epoch": 0.1706362988283035, "grad_norm": 6.578185739755169, "learning_rate": 9.490363066708939e-06, "loss": 17.4495, "step": 9335 }, { "epoch": 0.17065457802475004, "grad_norm": 5.573591882246648, "learning_rate": 9.490232858986353e-06, "loss": 16.9851, "step": 9336 }, { "epoch": 0.17067285722119654, "grad_norm": 5.830884237617553, "learning_rate": 9.490102635525891e-06, "loss": 17.0644, "step": 9337 }, { "epoch": 0.17069113641764308, "grad_norm": 7.101055498888201, "learning_rate": 9.489972396328008e-06, "loss": 17.8703, "step": 9338 }, { "epoch": 0.17070941561408962, "grad_norm": 6.8893580123268325, "learning_rate": 9.489842141393162e-06, "loss": 17.3605, "step": 9339 }, { "epoch": 0.17072769481053612, "grad_norm": 7.00415508406558, "learning_rate": 9.489711870721808e-06, "loss": 17.6772, "step": 9340 }, { "epoch": 0.17074597400698266, "grad_norm": 6.687536240515922, "learning_rate": 9.489581584314404e-06, "loss": 17.6577, "step": 9341 }, { "epoch": 0.17076425320342917, "grad_norm": 6.716421562949809, "learning_rate": 9.489451282171407e-06, "loss": 17.8492, "step": 9342 }, { "epoch": 0.1707825323998757, "grad_norm": 7.025368007532384, "learning_rate": 9.489320964293272e-06, "loss": 17.8394, "step": 9343 }, { "epoch": 0.17080081159632224, "grad_norm": 8.1347781081007, "learning_rate": 9.489190630680456e-06, "loss": 18.1092, "step": 9344 }, { "epoch": 0.17081909079276875, "grad_norm": 8.161686929233428, "learning_rate": 9.489060281333417e-06, "loss": 18.0617, "step": 9345 }, { "epoch": 0.17083736998921528, "grad_norm": 7.743324549428767, "learning_rate": 9.488929916252611e-06, "loss": 18.2099, "step": 9346 }, { "epoch": 0.1708556491856618, "grad_norm": 7.457913709310798, "learning_rate": 9.488799535438496e-06, "loss": 17.9657, "step": 9347 }, { "epoch": 0.17087392838210833, "grad_norm": 6.4565171819898275, "learning_rate": 9.48866913889153e-06, "loss": 17.4339, "step": 9348 }, { "epoch": 0.17089220757855483, "grad_norm": 5.7198681828309175, "learning_rate": 9.488538726612165e-06, "loss": 17.1374, "step": 9349 }, { "epoch": 0.17091048677500137, "grad_norm": 5.829504990701503, "learning_rate": 9.488408298600861e-06, "loss": 17.1341, "step": 9350 }, { "epoch": 0.1709287659714479, "grad_norm": 8.547314581987964, "learning_rate": 9.488277854858077e-06, "loss": 18.4687, "step": 9351 }, { "epoch": 0.1709470451678944, "grad_norm": 6.427092565488006, "learning_rate": 9.488147395384267e-06, "loss": 17.4307, "step": 9352 }, { "epoch": 0.17096532436434095, "grad_norm": 5.945035141440985, "learning_rate": 9.488016920179892e-06, "loss": 17.3094, "step": 9353 }, { "epoch": 0.17098360356078746, "grad_norm": 5.212870788799658, "learning_rate": 9.487886429245406e-06, "loss": 16.9609, "step": 9354 }, { "epoch": 0.171001882757234, "grad_norm": 7.182918513019496, "learning_rate": 9.487755922581267e-06, "loss": 17.9397, "step": 9355 }, { "epoch": 0.17102016195368053, "grad_norm": 8.180485431431427, "learning_rate": 9.487625400187935e-06, "loss": 18.2699, "step": 9356 }, { "epoch": 0.17103844115012704, "grad_norm": 6.908136535638802, "learning_rate": 9.487494862065863e-06, "loss": 17.6333, "step": 9357 }, { "epoch": 0.17105672034657357, "grad_norm": 6.582988597315935, "learning_rate": 9.487364308215513e-06, "loss": 17.5628, "step": 9358 }, { "epoch": 0.17107499954302008, "grad_norm": 7.458926835537541, "learning_rate": 9.487233738637338e-06, "loss": 17.9193, "step": 9359 }, { "epoch": 0.17109327873946661, "grad_norm": 7.8258914490505065, "learning_rate": 9.487103153331799e-06, "loss": 17.9659, "step": 9360 }, { "epoch": 0.17111155793591315, "grad_norm": 7.791564347644736, "learning_rate": 9.486972552299354e-06, "loss": 18.1149, "step": 9361 }, { "epoch": 0.17112983713235966, "grad_norm": 7.872110184686366, "learning_rate": 9.486841935540458e-06, "loss": 17.9103, "step": 9362 }, { "epoch": 0.1711481163288062, "grad_norm": 7.707540377198523, "learning_rate": 9.486711303055571e-06, "loss": 18.2933, "step": 9363 }, { "epoch": 0.1711663955252527, "grad_norm": 7.144271057657575, "learning_rate": 9.486580654845151e-06, "loss": 17.9076, "step": 9364 }, { "epoch": 0.17118467472169924, "grad_norm": 5.582895005836393, "learning_rate": 9.486449990909654e-06, "loss": 17.0059, "step": 9365 }, { "epoch": 0.17120295391814574, "grad_norm": 5.283504181308812, "learning_rate": 9.48631931124954e-06, "loss": 16.9713, "step": 9366 }, { "epoch": 0.17122123311459228, "grad_norm": 7.036891960760273, "learning_rate": 9.486188615865267e-06, "loss": 17.764, "step": 9367 }, { "epoch": 0.17123951231103882, "grad_norm": 7.0244422250085155, "learning_rate": 9.48605790475729e-06, "loss": 17.9067, "step": 9368 }, { "epoch": 0.17125779150748532, "grad_norm": 6.402158325323122, "learning_rate": 9.485927177926071e-06, "loss": 17.339, "step": 9369 }, { "epoch": 0.17127607070393186, "grad_norm": 6.943192297422052, "learning_rate": 9.485796435372066e-06, "loss": 17.7603, "step": 9370 }, { "epoch": 0.17129434990037837, "grad_norm": 6.398925418463297, "learning_rate": 9.485665677095733e-06, "loss": 17.2458, "step": 9371 }, { "epoch": 0.1713126290968249, "grad_norm": 6.856151315543031, "learning_rate": 9.48553490309753e-06, "loss": 17.853, "step": 9372 }, { "epoch": 0.17133090829327144, "grad_norm": 7.3519575505114565, "learning_rate": 9.48540411337792e-06, "loss": 17.97, "step": 9373 }, { "epoch": 0.17134918748971795, "grad_norm": 6.2059150149780615, "learning_rate": 9.485273307937354e-06, "loss": 17.2831, "step": 9374 }, { "epoch": 0.17136746668616448, "grad_norm": 5.751463733804113, "learning_rate": 9.485142486776297e-06, "loss": 17.1818, "step": 9375 }, { "epoch": 0.171385745882611, "grad_norm": 7.047700854948244, "learning_rate": 9.485011649895204e-06, "loss": 17.7665, "step": 9376 }, { "epoch": 0.17140402507905753, "grad_norm": 8.475999209283327, "learning_rate": 9.484880797294534e-06, "loss": 18.6278, "step": 9377 }, { "epoch": 0.17142230427550406, "grad_norm": 6.271246333842615, "learning_rate": 9.484749928974745e-06, "loss": 17.466, "step": 9378 }, { "epoch": 0.17144058347195057, "grad_norm": 7.504721163883052, "learning_rate": 9.4846190449363e-06, "loss": 17.8629, "step": 9379 }, { "epoch": 0.1714588626683971, "grad_norm": 6.753137748892236, "learning_rate": 9.48448814517965e-06, "loss": 17.3884, "step": 9380 }, { "epoch": 0.1714771418648436, "grad_norm": 6.683449785929558, "learning_rate": 9.484357229705262e-06, "loss": 17.3045, "step": 9381 }, { "epoch": 0.17149542106129015, "grad_norm": 7.038968690947826, "learning_rate": 9.48422629851359e-06, "loss": 17.6161, "step": 9382 }, { "epoch": 0.17151370025773666, "grad_norm": 8.246775361551709, "learning_rate": 9.484095351605093e-06, "loss": 18.5538, "step": 9383 }, { "epoch": 0.1715319794541832, "grad_norm": 6.04432556618661, "learning_rate": 9.48396438898023e-06, "loss": 17.5251, "step": 9384 }, { "epoch": 0.17155025865062973, "grad_norm": 6.775248989592338, "learning_rate": 9.483833410639465e-06, "loss": 17.7649, "step": 9385 }, { "epoch": 0.17156853784707624, "grad_norm": 5.488825339415775, "learning_rate": 9.483702416583249e-06, "loss": 17.0887, "step": 9386 }, { "epoch": 0.17158681704352277, "grad_norm": 6.663263419093369, "learning_rate": 9.483571406812046e-06, "loss": 17.3529, "step": 9387 }, { "epoch": 0.17160509623996928, "grad_norm": 6.451288086920941, "learning_rate": 9.483440381326316e-06, "loss": 17.446, "step": 9388 }, { "epoch": 0.17162337543641581, "grad_norm": 9.190972193048253, "learning_rate": 9.483309340126514e-06, "loss": 18.0486, "step": 9389 }, { "epoch": 0.17164165463286235, "grad_norm": 6.5989987104060726, "learning_rate": 9.483178283213104e-06, "loss": 17.6012, "step": 9390 }, { "epoch": 0.17165993382930886, "grad_norm": 6.328149633870934, "learning_rate": 9.483047210586542e-06, "loss": 17.3198, "step": 9391 }, { "epoch": 0.1716782130257554, "grad_norm": 7.2569753611456065, "learning_rate": 9.482916122247289e-06, "loss": 17.9867, "step": 9392 }, { "epoch": 0.1716964922222019, "grad_norm": 6.898388072866421, "learning_rate": 9.482785018195803e-06, "loss": 17.5655, "step": 9393 }, { "epoch": 0.17171477141864844, "grad_norm": 8.01711657514621, "learning_rate": 9.482653898432546e-06, "loss": 18.3204, "step": 9394 }, { "epoch": 0.17173305061509497, "grad_norm": 6.975439275024527, "learning_rate": 9.482522762957976e-06, "loss": 18.1094, "step": 9395 }, { "epoch": 0.17175132981154148, "grad_norm": 5.868700240209868, "learning_rate": 9.48239161177255e-06, "loss": 16.9796, "step": 9396 }, { "epoch": 0.17176960900798802, "grad_norm": 7.08908481341133, "learning_rate": 9.482260444876733e-06, "loss": 17.6141, "step": 9397 }, { "epoch": 0.17178788820443452, "grad_norm": 7.055610246006772, "learning_rate": 9.482129262270982e-06, "loss": 17.5318, "step": 9398 }, { "epoch": 0.17180616740088106, "grad_norm": 6.116674772768493, "learning_rate": 9.481998063955756e-06, "loss": 17.1327, "step": 9399 }, { "epoch": 0.17182444659732757, "grad_norm": 8.588323819381399, "learning_rate": 9.481866849931516e-06, "loss": 18.4417, "step": 9400 }, { "epoch": 0.1718427257937741, "grad_norm": 7.51140039685407, "learning_rate": 9.481735620198722e-06, "loss": 17.8579, "step": 9401 }, { "epoch": 0.17186100499022064, "grad_norm": 8.035682754298808, "learning_rate": 9.481604374757834e-06, "loss": 17.7268, "step": 9402 }, { "epoch": 0.17187928418666715, "grad_norm": 7.592674702990398, "learning_rate": 9.481473113609309e-06, "loss": 18.0118, "step": 9403 }, { "epoch": 0.17189756338311368, "grad_norm": 6.667093276838761, "learning_rate": 9.481341836753612e-06, "loss": 17.4662, "step": 9404 }, { "epoch": 0.1719158425795602, "grad_norm": 7.810607329528676, "learning_rate": 9.4812105441912e-06, "loss": 17.719, "step": 9405 }, { "epoch": 0.17193412177600673, "grad_norm": 7.194972914942562, "learning_rate": 9.481079235922534e-06, "loss": 17.5976, "step": 9406 }, { "epoch": 0.17195240097245326, "grad_norm": 7.9834727347623895, "learning_rate": 9.480947911948075e-06, "loss": 18.1629, "step": 9407 }, { "epoch": 0.17197068016889977, "grad_norm": 7.18606951996447, "learning_rate": 9.480816572268281e-06, "loss": 17.5883, "step": 9408 }, { "epoch": 0.1719889593653463, "grad_norm": 6.486718640880905, "learning_rate": 9.480685216883614e-06, "loss": 17.5277, "step": 9409 }, { "epoch": 0.1720072385617928, "grad_norm": 9.50867175041148, "learning_rate": 9.480553845794534e-06, "loss": 18.0472, "step": 9410 }, { "epoch": 0.17202551775823935, "grad_norm": 6.833479189201265, "learning_rate": 9.480422459001503e-06, "loss": 17.6381, "step": 9411 }, { "epoch": 0.17204379695468588, "grad_norm": 6.277692317050017, "learning_rate": 9.480291056504978e-06, "loss": 17.2588, "step": 9412 }, { "epoch": 0.1720620761511324, "grad_norm": 5.7551353372689995, "learning_rate": 9.480159638305424e-06, "loss": 17.2131, "step": 9413 }, { "epoch": 0.17208035534757893, "grad_norm": 8.692658198171147, "learning_rate": 9.480028204403298e-06, "loss": 17.8449, "step": 9414 }, { "epoch": 0.17209863454402544, "grad_norm": 7.435976881499505, "learning_rate": 9.47989675479906e-06, "loss": 17.9663, "step": 9415 }, { "epoch": 0.17211691374047197, "grad_norm": 8.665769778151954, "learning_rate": 9.479765289493176e-06, "loss": 18.0989, "step": 9416 }, { "epoch": 0.17213519293691848, "grad_norm": 7.548053517846902, "learning_rate": 9.479633808486103e-06, "loss": 17.9994, "step": 9417 }, { "epoch": 0.17215347213336502, "grad_norm": 8.24601842526659, "learning_rate": 9.4795023117783e-06, "loss": 17.7702, "step": 9418 }, { "epoch": 0.17217175132981155, "grad_norm": 7.3080893508518265, "learning_rate": 9.479370799370231e-06, "loss": 17.6346, "step": 9419 }, { "epoch": 0.17219003052625806, "grad_norm": 6.95859428350624, "learning_rate": 9.479239271262356e-06, "loss": 17.7272, "step": 9420 }, { "epoch": 0.1722083097227046, "grad_norm": 5.658747751909899, "learning_rate": 9.479107727455137e-06, "loss": 17.1958, "step": 9421 }, { "epoch": 0.1722265889191511, "grad_norm": 9.264521686672701, "learning_rate": 9.478976167949032e-06, "loss": 18.6736, "step": 9422 }, { "epoch": 0.17224486811559764, "grad_norm": 10.464266390240628, "learning_rate": 9.478844592744507e-06, "loss": 18.6533, "step": 9423 }, { "epoch": 0.17226314731204417, "grad_norm": 6.793778486617364, "learning_rate": 9.478713001842019e-06, "loss": 17.5827, "step": 9424 }, { "epoch": 0.17228142650849068, "grad_norm": 7.856395977325854, "learning_rate": 9.47858139524203e-06, "loss": 18.2891, "step": 9425 }, { "epoch": 0.17229970570493722, "grad_norm": 6.938032387697228, "learning_rate": 9.478449772945003e-06, "loss": 17.7025, "step": 9426 }, { "epoch": 0.17231798490138373, "grad_norm": 6.694615648790434, "learning_rate": 9.478318134951396e-06, "loss": 17.7233, "step": 9427 }, { "epoch": 0.17233626409783026, "grad_norm": 7.934515973309323, "learning_rate": 9.478186481261674e-06, "loss": 18.6778, "step": 9428 }, { "epoch": 0.1723545432942768, "grad_norm": 7.6012840022268655, "learning_rate": 9.478054811876298e-06, "loss": 17.8783, "step": 9429 }, { "epoch": 0.1723728224907233, "grad_norm": 7.09475782007339, "learning_rate": 9.477923126795727e-06, "loss": 17.8057, "step": 9430 }, { "epoch": 0.17239110168716984, "grad_norm": 6.954595742628623, "learning_rate": 9.477791426020425e-06, "loss": 17.8825, "step": 9431 }, { "epoch": 0.17240938088361635, "grad_norm": 6.6808165057065, "learning_rate": 9.477659709550852e-06, "loss": 17.6323, "step": 9432 }, { "epoch": 0.17242766008006288, "grad_norm": 9.677616218480246, "learning_rate": 9.477527977387471e-06, "loss": 18.3513, "step": 9433 }, { "epoch": 0.1724459392765094, "grad_norm": 6.898574913690863, "learning_rate": 9.477396229530742e-06, "loss": 17.7003, "step": 9434 }, { "epoch": 0.17246421847295593, "grad_norm": 6.22690980824051, "learning_rate": 9.477264465981128e-06, "loss": 17.5063, "step": 9435 }, { "epoch": 0.17248249766940246, "grad_norm": 6.551990444920954, "learning_rate": 9.477132686739091e-06, "loss": 17.5723, "step": 9436 }, { "epoch": 0.17250077686584897, "grad_norm": 8.222108731581205, "learning_rate": 9.477000891805092e-06, "loss": 18.1106, "step": 9437 }, { "epoch": 0.1725190560622955, "grad_norm": 7.472141826139331, "learning_rate": 9.476869081179595e-06, "loss": 17.926, "step": 9438 }, { "epoch": 0.17253733525874201, "grad_norm": 5.729838615705796, "learning_rate": 9.476737254863057e-06, "loss": 17.2853, "step": 9439 }, { "epoch": 0.17255561445518855, "grad_norm": 6.169816445120499, "learning_rate": 9.476605412855946e-06, "loss": 17.3952, "step": 9440 }, { "epoch": 0.17257389365163509, "grad_norm": 5.714087489923032, "learning_rate": 9.47647355515872e-06, "loss": 17.0376, "step": 9441 }, { "epoch": 0.1725921728480816, "grad_norm": 6.319413544978291, "learning_rate": 9.476341681771844e-06, "loss": 17.6697, "step": 9442 }, { "epoch": 0.17261045204452813, "grad_norm": 6.365535308271305, "learning_rate": 9.476209792695779e-06, "loss": 17.5086, "step": 9443 }, { "epoch": 0.17262873124097464, "grad_norm": 9.358700783372539, "learning_rate": 9.476077887930985e-06, "loss": 18.0325, "step": 9444 }, { "epoch": 0.17264701043742117, "grad_norm": 5.875412377637854, "learning_rate": 9.475945967477929e-06, "loss": 17.2611, "step": 9445 }, { "epoch": 0.1726652896338677, "grad_norm": 6.57373215116459, "learning_rate": 9.47581403133707e-06, "loss": 17.5664, "step": 9446 }, { "epoch": 0.17268356883031422, "grad_norm": 6.9318182688007655, "learning_rate": 9.47568207950887e-06, "loss": 17.4997, "step": 9447 }, { "epoch": 0.17270184802676075, "grad_norm": 8.41243281063665, "learning_rate": 9.475550111993794e-06, "loss": 17.8644, "step": 9448 }, { "epoch": 0.17272012722320726, "grad_norm": 7.471280300668058, "learning_rate": 9.475418128792302e-06, "loss": 17.8876, "step": 9449 }, { "epoch": 0.1727384064196538, "grad_norm": 6.601480469516368, "learning_rate": 9.47528612990486e-06, "loss": 17.4701, "step": 9450 }, { "epoch": 0.1727566856161003, "grad_norm": 6.810351201802684, "learning_rate": 9.475154115331926e-06, "loss": 17.5172, "step": 9451 }, { "epoch": 0.17277496481254684, "grad_norm": 7.398992952527537, "learning_rate": 9.475022085073967e-06, "loss": 17.4504, "step": 9452 }, { "epoch": 0.17279324400899337, "grad_norm": 6.6966327893105415, "learning_rate": 9.474890039131442e-06, "loss": 17.4509, "step": 9453 }, { "epoch": 0.17281152320543988, "grad_norm": 6.158766465461097, "learning_rate": 9.474757977504817e-06, "loss": 17.4434, "step": 9454 }, { "epoch": 0.17282980240188642, "grad_norm": 5.923134645722096, "learning_rate": 9.474625900194554e-06, "loss": 16.9824, "step": 9455 }, { "epoch": 0.17284808159833293, "grad_norm": 8.076136920082657, "learning_rate": 9.474493807201114e-06, "loss": 17.8831, "step": 9456 }, { "epoch": 0.17286636079477946, "grad_norm": 6.456655976023691, "learning_rate": 9.474361698524963e-06, "loss": 17.6123, "step": 9457 }, { "epoch": 0.172884639991226, "grad_norm": 7.792694797980782, "learning_rate": 9.474229574166563e-06, "loss": 17.6647, "step": 9458 }, { "epoch": 0.1729029191876725, "grad_norm": 7.571500525823527, "learning_rate": 9.474097434126374e-06, "loss": 18.2985, "step": 9459 }, { "epoch": 0.17292119838411904, "grad_norm": 7.8823129226813275, "learning_rate": 9.473965278404865e-06, "loss": 17.6927, "step": 9460 }, { "epoch": 0.17293947758056555, "grad_norm": 6.901785326814127, "learning_rate": 9.473833107002492e-06, "loss": 17.8228, "step": 9461 }, { "epoch": 0.17295775677701208, "grad_norm": 9.092246095613532, "learning_rate": 9.473700919919725e-06, "loss": 18.1414, "step": 9462 }, { "epoch": 0.17297603597345862, "grad_norm": 8.048377244864216, "learning_rate": 9.473568717157024e-06, "loss": 18.1686, "step": 9463 }, { "epoch": 0.17299431516990513, "grad_norm": 6.299790719426887, "learning_rate": 9.473436498714852e-06, "loss": 17.4108, "step": 9464 }, { "epoch": 0.17301259436635166, "grad_norm": 7.137264906670612, "learning_rate": 9.473304264593674e-06, "loss": 17.9108, "step": 9465 }, { "epoch": 0.17303087356279817, "grad_norm": 6.39898306603595, "learning_rate": 9.473172014793953e-06, "loss": 17.4791, "step": 9466 }, { "epoch": 0.1730491527592447, "grad_norm": 6.967386160536522, "learning_rate": 9.473039749316152e-06, "loss": 17.5312, "step": 9467 }, { "epoch": 0.17306743195569121, "grad_norm": 6.762288020691088, "learning_rate": 9.472907468160735e-06, "loss": 17.6392, "step": 9468 }, { "epoch": 0.17308571115213775, "grad_norm": 8.540444264241714, "learning_rate": 9.472775171328165e-06, "loss": 18.0354, "step": 9469 }, { "epoch": 0.17310399034858429, "grad_norm": 6.765161338247924, "learning_rate": 9.472642858818906e-06, "loss": 17.7281, "step": 9470 }, { "epoch": 0.1731222695450308, "grad_norm": 6.962275288338839, "learning_rate": 9.472510530633422e-06, "loss": 17.6537, "step": 9471 }, { "epoch": 0.17314054874147733, "grad_norm": 7.049348562423112, "learning_rate": 9.472378186772177e-06, "loss": 17.7875, "step": 9472 }, { "epoch": 0.17315882793792384, "grad_norm": 6.193548539422179, "learning_rate": 9.472245827235636e-06, "loss": 17.3468, "step": 9473 }, { "epoch": 0.17317710713437037, "grad_norm": 7.164603951954371, "learning_rate": 9.472113452024258e-06, "loss": 17.9715, "step": 9474 }, { "epoch": 0.1731953863308169, "grad_norm": 6.968068689021553, "learning_rate": 9.471981061138513e-06, "loss": 17.7948, "step": 9475 }, { "epoch": 0.17321366552726342, "grad_norm": 7.232139235254524, "learning_rate": 9.471848654578862e-06, "loss": 17.7792, "step": 9476 }, { "epoch": 0.17323194472370995, "grad_norm": 5.9023763429473215, "learning_rate": 9.471716232345769e-06, "loss": 16.9879, "step": 9477 }, { "epoch": 0.17325022392015646, "grad_norm": 5.738475405414184, "learning_rate": 9.4715837944397e-06, "loss": 17.0774, "step": 9478 }, { "epoch": 0.173268503116603, "grad_norm": 6.112365319319647, "learning_rate": 9.471451340861117e-06, "loss": 17.4933, "step": 9479 }, { "epoch": 0.17328678231304953, "grad_norm": 6.886066210165729, "learning_rate": 9.471318871610484e-06, "loss": 17.8008, "step": 9480 }, { "epoch": 0.17330506150949604, "grad_norm": 7.979041327998414, "learning_rate": 9.471186386688267e-06, "loss": 17.9677, "step": 9481 }, { "epoch": 0.17332334070594257, "grad_norm": 6.447605600130551, "learning_rate": 9.47105388609493e-06, "loss": 17.4334, "step": 9482 }, { "epoch": 0.17334161990238908, "grad_norm": 6.919822594374885, "learning_rate": 9.470921369830938e-06, "loss": 17.4449, "step": 9483 }, { "epoch": 0.17335989909883562, "grad_norm": 6.224254864865902, "learning_rate": 9.470788837896753e-06, "loss": 17.3345, "step": 9484 }, { "epoch": 0.17337817829528213, "grad_norm": 7.83572911688027, "learning_rate": 9.470656290292842e-06, "loss": 17.9033, "step": 9485 }, { "epoch": 0.17339645749172866, "grad_norm": 6.013585901590195, "learning_rate": 9.47052372701967e-06, "loss": 17.1984, "step": 9486 }, { "epoch": 0.1734147366881752, "grad_norm": 6.787471336236899, "learning_rate": 9.470391148077697e-06, "loss": 17.2764, "step": 9487 }, { "epoch": 0.1734330158846217, "grad_norm": 8.49541940305247, "learning_rate": 9.470258553467392e-06, "loss": 18.336, "step": 9488 }, { "epoch": 0.17345129508106824, "grad_norm": 7.359129457025921, "learning_rate": 9.47012594318922e-06, "loss": 17.8962, "step": 9489 }, { "epoch": 0.17346957427751475, "grad_norm": 5.905825614079544, "learning_rate": 9.469993317243644e-06, "loss": 17.2531, "step": 9490 }, { "epoch": 0.17348785347396128, "grad_norm": 6.661236621727768, "learning_rate": 9.469860675631129e-06, "loss": 17.4225, "step": 9491 }, { "epoch": 0.17350613267040782, "grad_norm": 7.970349098170724, "learning_rate": 9.46972801835214e-06, "loss": 18.0449, "step": 9492 }, { "epoch": 0.17352441186685433, "grad_norm": 7.870137417874784, "learning_rate": 9.469595345407144e-06, "loss": 18.2065, "step": 9493 }, { "epoch": 0.17354269106330086, "grad_norm": 6.630693665009491, "learning_rate": 9.4694626567966e-06, "loss": 17.6834, "step": 9494 }, { "epoch": 0.17356097025974737, "grad_norm": 6.727202801770572, "learning_rate": 9.46932995252098e-06, "loss": 17.6419, "step": 9495 }, { "epoch": 0.1735792494561939, "grad_norm": 8.627751500853709, "learning_rate": 9.469197232580747e-06, "loss": 18.1795, "step": 9496 }, { "epoch": 0.17359752865264044, "grad_norm": 6.179486784072806, "learning_rate": 9.469064496976365e-06, "loss": 17.31, "step": 9497 }, { "epoch": 0.17361580784908695, "grad_norm": 6.334591869376551, "learning_rate": 9.468931745708298e-06, "loss": 17.6099, "step": 9498 }, { "epoch": 0.1736340870455335, "grad_norm": 6.316203075348294, "learning_rate": 9.468798978777016e-06, "loss": 17.4841, "step": 9499 }, { "epoch": 0.17365236624198, "grad_norm": 7.7296442146211515, "learning_rate": 9.46866619618298e-06, "loss": 17.8273, "step": 9500 }, { "epoch": 0.17367064543842653, "grad_norm": 6.517640989987836, "learning_rate": 9.468533397926656e-06, "loss": 17.6511, "step": 9501 }, { "epoch": 0.17368892463487304, "grad_norm": 6.486360811626599, "learning_rate": 9.46840058400851e-06, "loss": 17.7592, "step": 9502 }, { "epoch": 0.17370720383131957, "grad_norm": 6.973484127279262, "learning_rate": 9.468267754429009e-06, "loss": 17.5944, "step": 9503 }, { "epoch": 0.1737254830277661, "grad_norm": 6.580408787595043, "learning_rate": 9.468134909188616e-06, "loss": 17.7031, "step": 9504 }, { "epoch": 0.17374376222421262, "grad_norm": 6.121295434799951, "learning_rate": 9.468002048287799e-06, "loss": 17.4001, "step": 9505 }, { "epoch": 0.17376204142065915, "grad_norm": 6.64922992160366, "learning_rate": 9.467869171727022e-06, "loss": 17.7164, "step": 9506 }, { "epoch": 0.17378032061710566, "grad_norm": 6.549658495995932, "learning_rate": 9.467736279506752e-06, "loss": 17.4104, "step": 9507 }, { "epoch": 0.1737985998135522, "grad_norm": 6.197963987146198, "learning_rate": 9.467603371627454e-06, "loss": 17.3903, "step": 9508 }, { "epoch": 0.17381687900999873, "grad_norm": 6.181427883663454, "learning_rate": 9.467470448089592e-06, "loss": 17.346, "step": 9509 }, { "epoch": 0.17383515820644524, "grad_norm": 6.575355283380181, "learning_rate": 9.467337508893635e-06, "loss": 17.5846, "step": 9510 }, { "epoch": 0.17385343740289178, "grad_norm": 7.123198600785866, "learning_rate": 9.467204554040049e-06, "loss": 18.1258, "step": 9511 }, { "epoch": 0.17387171659933828, "grad_norm": 6.802118401190757, "learning_rate": 9.467071583529297e-06, "loss": 17.6492, "step": 9512 }, { "epoch": 0.17388999579578482, "grad_norm": 6.764335831864584, "learning_rate": 9.466938597361847e-06, "loss": 17.4252, "step": 9513 }, { "epoch": 0.17390827499223135, "grad_norm": 6.252125581799929, "learning_rate": 9.466805595538165e-06, "loss": 17.5834, "step": 9514 }, { "epoch": 0.17392655418867786, "grad_norm": 7.274545612028333, "learning_rate": 9.466672578058718e-06, "loss": 17.6257, "step": 9515 }, { "epoch": 0.1739448333851244, "grad_norm": 7.550294008739678, "learning_rate": 9.46653954492397e-06, "loss": 17.8097, "step": 9516 }, { "epoch": 0.1739631125815709, "grad_norm": 6.783902071701367, "learning_rate": 9.466406496134388e-06, "loss": 17.7032, "step": 9517 }, { "epoch": 0.17398139177801744, "grad_norm": 7.286554500889945, "learning_rate": 9.466273431690439e-06, "loss": 17.752, "step": 9518 }, { "epoch": 0.17399967097446395, "grad_norm": 6.522243344605513, "learning_rate": 9.46614035159259e-06, "loss": 17.6034, "step": 9519 }, { "epoch": 0.17401795017091048, "grad_norm": 6.418133639420691, "learning_rate": 9.466007255841307e-06, "loss": 17.4132, "step": 9520 }, { "epoch": 0.17403622936735702, "grad_norm": 6.754508075717153, "learning_rate": 9.465874144437055e-06, "loss": 17.8215, "step": 9521 }, { "epoch": 0.17405450856380353, "grad_norm": 6.2240970765795085, "learning_rate": 9.465741017380302e-06, "loss": 17.3865, "step": 9522 }, { "epoch": 0.17407278776025006, "grad_norm": 7.749530890247584, "learning_rate": 9.465607874671514e-06, "loss": 17.9878, "step": 9523 }, { "epoch": 0.17409106695669657, "grad_norm": 7.70501705038296, "learning_rate": 9.465474716311159e-06, "loss": 18.3077, "step": 9524 }, { "epoch": 0.1741093461531431, "grad_norm": 6.65303915166089, "learning_rate": 9.465341542299701e-06, "loss": 17.6386, "step": 9525 }, { "epoch": 0.17412762534958964, "grad_norm": 5.461389075851484, "learning_rate": 9.46520835263761e-06, "loss": 17.0258, "step": 9526 }, { "epoch": 0.17414590454603615, "grad_norm": 9.381998953111543, "learning_rate": 9.46507514732535e-06, "loss": 17.5598, "step": 9527 }, { "epoch": 0.1741641837424827, "grad_norm": 6.1220603773567, "learning_rate": 9.464941926363389e-06, "loss": 17.4067, "step": 9528 }, { "epoch": 0.1741824629389292, "grad_norm": 8.00793554449915, "learning_rate": 9.464808689752195e-06, "loss": 18.0463, "step": 9529 }, { "epoch": 0.17420074213537573, "grad_norm": 6.465493988527969, "learning_rate": 9.464675437492234e-06, "loss": 17.4025, "step": 9530 }, { "epoch": 0.17421902133182227, "grad_norm": 6.306562954623618, "learning_rate": 9.464542169583972e-06, "loss": 17.3987, "step": 9531 }, { "epoch": 0.17423730052826877, "grad_norm": 8.160215439993134, "learning_rate": 9.464408886027878e-06, "loss": 18.1336, "step": 9532 }, { "epoch": 0.1742555797247153, "grad_norm": 6.695224799762931, "learning_rate": 9.464275586824418e-06, "loss": 17.6448, "step": 9533 }, { "epoch": 0.17427385892116182, "grad_norm": 6.977009153648582, "learning_rate": 9.46414227197406e-06, "loss": 17.7975, "step": 9534 }, { "epoch": 0.17429213811760835, "grad_norm": 7.331561066680906, "learning_rate": 9.46400894147727e-06, "loss": 17.8779, "step": 9535 }, { "epoch": 0.17431041731405486, "grad_norm": 7.011303994873853, "learning_rate": 9.463875595334516e-06, "loss": 17.9372, "step": 9536 }, { "epoch": 0.1743286965105014, "grad_norm": 6.169869422341589, "learning_rate": 9.463742233546264e-06, "loss": 17.5423, "step": 9537 }, { "epoch": 0.17434697570694793, "grad_norm": 7.389420360269115, "learning_rate": 9.463608856112985e-06, "loss": 18.075, "step": 9538 }, { "epoch": 0.17436525490339444, "grad_norm": 8.262990116786577, "learning_rate": 9.463475463035143e-06, "loss": 18.1106, "step": 9539 }, { "epoch": 0.17438353409984098, "grad_norm": 7.2006562511719, "learning_rate": 9.463342054313207e-06, "loss": 17.6868, "step": 9540 }, { "epoch": 0.17440181329628748, "grad_norm": 7.840608118510198, "learning_rate": 9.463208629947645e-06, "loss": 18.3659, "step": 9541 }, { "epoch": 0.17442009249273402, "grad_norm": 7.0299308033934755, "learning_rate": 9.463075189938925e-06, "loss": 17.4907, "step": 9542 }, { "epoch": 0.17443837168918055, "grad_norm": 6.323297700497119, "learning_rate": 9.462941734287511e-06, "loss": 17.3497, "step": 9543 }, { "epoch": 0.17445665088562706, "grad_norm": 7.9047948424288315, "learning_rate": 9.462808262993876e-06, "loss": 17.917, "step": 9544 }, { "epoch": 0.1744749300820736, "grad_norm": 6.179598644734943, "learning_rate": 9.462674776058485e-06, "loss": 17.4775, "step": 9545 }, { "epoch": 0.1744932092785201, "grad_norm": 6.596936136388797, "learning_rate": 9.462541273481804e-06, "loss": 17.3298, "step": 9546 }, { "epoch": 0.17451148847496664, "grad_norm": 7.149311211802416, "learning_rate": 9.462407755264305e-06, "loss": 17.6487, "step": 9547 }, { "epoch": 0.17452976767141318, "grad_norm": 6.735124720416647, "learning_rate": 9.462274221406455e-06, "loss": 17.4208, "step": 9548 }, { "epoch": 0.17454804686785969, "grad_norm": 7.670894500045217, "learning_rate": 9.46214067190872e-06, "loss": 18.224, "step": 9549 }, { "epoch": 0.17456632606430622, "grad_norm": 6.435070996214837, "learning_rate": 9.462007106771569e-06, "loss": 17.6639, "step": 9550 }, { "epoch": 0.17458460526075273, "grad_norm": 7.6112651650811305, "learning_rate": 9.461873525995469e-06, "loss": 18.2247, "step": 9551 }, { "epoch": 0.17460288445719926, "grad_norm": 5.044347889863651, "learning_rate": 9.461739929580892e-06, "loss": 17.024, "step": 9552 }, { "epoch": 0.17462116365364577, "grad_norm": 6.5749082393182645, "learning_rate": 9.4616063175283e-06, "loss": 17.595, "step": 9553 }, { "epoch": 0.1746394428500923, "grad_norm": 6.841947019941837, "learning_rate": 9.46147268983817e-06, "loss": 17.6326, "step": 9554 }, { "epoch": 0.17465772204653884, "grad_norm": 7.120305404120127, "learning_rate": 9.461339046510962e-06, "loss": 17.5067, "step": 9555 }, { "epoch": 0.17467600124298535, "grad_norm": 9.627379721584031, "learning_rate": 9.461205387547147e-06, "loss": 18.5417, "step": 9556 }, { "epoch": 0.1746942804394319, "grad_norm": 6.590698216359157, "learning_rate": 9.461071712947197e-06, "loss": 17.5086, "step": 9557 }, { "epoch": 0.1747125596358784, "grad_norm": 6.823081760392698, "learning_rate": 9.460938022711576e-06, "loss": 17.5671, "step": 9558 }, { "epoch": 0.17473083883232493, "grad_norm": 7.28494667014169, "learning_rate": 9.460804316840755e-06, "loss": 18.4391, "step": 9559 }, { "epoch": 0.17474911802877147, "grad_norm": 7.1721623372429555, "learning_rate": 9.460670595335201e-06, "loss": 17.804, "step": 9560 }, { "epoch": 0.17476739722521797, "grad_norm": 7.984601933362222, "learning_rate": 9.460536858195387e-06, "loss": 18.2357, "step": 9561 }, { "epoch": 0.1747856764216645, "grad_norm": 6.3102168532318395, "learning_rate": 9.460403105421773e-06, "loss": 17.2541, "step": 9562 }, { "epoch": 0.17480395561811102, "grad_norm": 7.554736078456946, "learning_rate": 9.460269337014838e-06, "loss": 17.9618, "step": 9563 }, { "epoch": 0.17482223481455755, "grad_norm": 8.529445122710069, "learning_rate": 9.460135552975043e-06, "loss": 18.1476, "step": 9564 }, { "epoch": 0.1748405140110041, "grad_norm": 7.224102814581794, "learning_rate": 9.460001753302862e-06, "loss": 17.9554, "step": 9565 }, { "epoch": 0.1748587932074506, "grad_norm": 7.694910481548016, "learning_rate": 9.459867937998762e-06, "loss": 18.3189, "step": 9566 }, { "epoch": 0.17487707240389713, "grad_norm": 6.902715343961525, "learning_rate": 9.45973410706321e-06, "loss": 17.6001, "step": 9567 }, { "epoch": 0.17489535160034364, "grad_norm": 6.689873324000622, "learning_rate": 9.459600260496677e-06, "loss": 17.6243, "step": 9568 }, { "epoch": 0.17491363079679018, "grad_norm": 6.086483769488931, "learning_rate": 9.459466398299632e-06, "loss": 17.2564, "step": 9569 }, { "epoch": 0.17493190999323668, "grad_norm": 7.063496404856035, "learning_rate": 9.459332520472546e-06, "loss": 17.6633, "step": 9570 }, { "epoch": 0.17495018918968322, "grad_norm": 7.943562445239964, "learning_rate": 9.459198627015885e-06, "loss": 18.2078, "step": 9571 }, { "epoch": 0.17496846838612976, "grad_norm": 6.494236879534126, "learning_rate": 9.45906471793012e-06, "loss": 17.5818, "step": 9572 }, { "epoch": 0.17498674758257626, "grad_norm": 7.441500171931855, "learning_rate": 9.458930793215722e-06, "loss": 17.849, "step": 9573 }, { "epoch": 0.1750050267790228, "grad_norm": 8.228828669558796, "learning_rate": 9.458796852873155e-06, "loss": 18.0843, "step": 9574 }, { "epoch": 0.1750233059754693, "grad_norm": 5.825673737747694, "learning_rate": 9.458662896902895e-06, "loss": 17.0919, "step": 9575 }, { "epoch": 0.17504158517191584, "grad_norm": 5.872710126464134, "learning_rate": 9.458528925305406e-06, "loss": 17.2865, "step": 9576 }, { "epoch": 0.17505986436836238, "grad_norm": 7.611633683494937, "learning_rate": 9.458394938081162e-06, "loss": 17.8971, "step": 9577 }, { "epoch": 0.17507814356480889, "grad_norm": 7.063929080650453, "learning_rate": 9.45826093523063e-06, "loss": 17.736, "step": 9578 }, { "epoch": 0.17509642276125542, "grad_norm": 7.791055831721408, "learning_rate": 9.45812691675428e-06, "loss": 18.0464, "step": 9579 }, { "epoch": 0.17511470195770193, "grad_norm": 7.736108451433705, "learning_rate": 9.457992882652583e-06, "loss": 17.5614, "step": 9580 }, { "epoch": 0.17513298115414846, "grad_norm": 8.217265916851911, "learning_rate": 9.457858832926006e-06, "loss": 17.9376, "step": 9581 }, { "epoch": 0.175151260350595, "grad_norm": 6.331160305006264, "learning_rate": 9.457724767575023e-06, "loss": 17.4408, "step": 9582 }, { "epoch": 0.1751695395470415, "grad_norm": 6.254197417031652, "learning_rate": 9.4575906866001e-06, "loss": 17.1966, "step": 9583 }, { "epoch": 0.17518781874348804, "grad_norm": 7.363918482060166, "learning_rate": 9.457456590001708e-06, "loss": 17.5009, "step": 9584 }, { "epoch": 0.17520609793993455, "grad_norm": 6.023205168853669, "learning_rate": 9.457322477780317e-06, "loss": 17.2705, "step": 9585 }, { "epoch": 0.1752243771363811, "grad_norm": 6.668531385943409, "learning_rate": 9.457188349936399e-06, "loss": 17.4821, "step": 9586 }, { "epoch": 0.1752426563328276, "grad_norm": 6.677434109572615, "learning_rate": 9.457054206470422e-06, "loss": 17.4897, "step": 9587 }, { "epoch": 0.17526093552927413, "grad_norm": 7.421678870093581, "learning_rate": 9.456920047382858e-06, "loss": 17.7544, "step": 9588 }, { "epoch": 0.17527921472572067, "grad_norm": 5.749753796678464, "learning_rate": 9.456785872674174e-06, "loss": 17.1499, "step": 9589 }, { "epoch": 0.17529749392216717, "grad_norm": 7.657753373935576, "learning_rate": 9.456651682344844e-06, "loss": 17.8947, "step": 9590 }, { "epoch": 0.1753157731186137, "grad_norm": 5.800114173360619, "learning_rate": 9.456517476395334e-06, "loss": 17.5072, "step": 9591 }, { "epoch": 0.17533405231506022, "grad_norm": 6.370407507874773, "learning_rate": 9.45638325482612e-06, "loss": 17.6061, "step": 9592 }, { "epoch": 0.17535233151150675, "grad_norm": 7.54027055496554, "learning_rate": 9.456249017637669e-06, "loss": 18.0063, "step": 9593 }, { "epoch": 0.1753706107079533, "grad_norm": 7.614861521245855, "learning_rate": 9.45611476483045e-06, "loss": 17.9974, "step": 9594 }, { "epoch": 0.1753888899043998, "grad_norm": 7.161606441548655, "learning_rate": 9.455980496404937e-06, "loss": 18.0735, "step": 9595 }, { "epoch": 0.17540716910084633, "grad_norm": 8.015904801234518, "learning_rate": 9.4558462123616e-06, "loss": 18.0219, "step": 9596 }, { "epoch": 0.17542544829729284, "grad_norm": 6.314264808075519, "learning_rate": 9.455711912700906e-06, "loss": 17.5404, "step": 9597 }, { "epoch": 0.17544372749373938, "grad_norm": 6.389007170985312, "learning_rate": 9.455577597423329e-06, "loss": 17.4354, "step": 9598 }, { "epoch": 0.1754620066901859, "grad_norm": 6.450687917607619, "learning_rate": 9.45544326652934e-06, "loss": 17.6394, "step": 9599 }, { "epoch": 0.17548028588663242, "grad_norm": 8.174249896841113, "learning_rate": 9.455308920019407e-06, "loss": 17.6692, "step": 9600 }, { "epoch": 0.17549856508307896, "grad_norm": 6.553016425433632, "learning_rate": 9.455174557894006e-06, "loss": 17.7826, "step": 9601 }, { "epoch": 0.17551684427952546, "grad_norm": 6.90399626900844, "learning_rate": 9.455040180153602e-06, "loss": 17.644, "step": 9602 }, { "epoch": 0.175535123475972, "grad_norm": 7.13345054726534, "learning_rate": 9.454905786798671e-06, "loss": 17.8096, "step": 9603 }, { "epoch": 0.1755534026724185, "grad_norm": 6.901085836989342, "learning_rate": 9.454771377829682e-06, "loss": 17.8616, "step": 9604 }, { "epoch": 0.17557168186886504, "grad_norm": 6.407392162682347, "learning_rate": 9.454636953247106e-06, "loss": 17.5621, "step": 9605 }, { "epoch": 0.17558996106531158, "grad_norm": 8.909637325449719, "learning_rate": 9.454502513051411e-06, "loss": 18.2019, "step": 9606 }, { "epoch": 0.1756082402617581, "grad_norm": 7.75470214126141, "learning_rate": 9.454368057243072e-06, "loss": 17.8469, "step": 9607 }, { "epoch": 0.17562651945820462, "grad_norm": 6.488640748696882, "learning_rate": 9.454233585822562e-06, "loss": 17.6242, "step": 9608 }, { "epoch": 0.17564479865465113, "grad_norm": 7.347397929283839, "learning_rate": 9.454099098790348e-06, "loss": 17.8359, "step": 9609 }, { "epoch": 0.17566307785109767, "grad_norm": 7.888743786396532, "learning_rate": 9.453964596146902e-06, "loss": 18.2884, "step": 9610 }, { "epoch": 0.1756813570475442, "grad_norm": 8.862293313268275, "learning_rate": 9.453830077892699e-06, "loss": 18.3987, "step": 9611 }, { "epoch": 0.1756996362439907, "grad_norm": 7.377641513426715, "learning_rate": 9.453695544028207e-06, "loss": 17.9834, "step": 9612 }, { "epoch": 0.17571791544043724, "grad_norm": 9.07862999594866, "learning_rate": 9.453560994553899e-06, "loss": 18.696, "step": 9613 }, { "epoch": 0.17573619463688375, "grad_norm": 6.545419816988638, "learning_rate": 9.453426429470247e-06, "loss": 17.328, "step": 9614 }, { "epoch": 0.1757544738333303, "grad_norm": 6.713530668856103, "learning_rate": 9.45329184877772e-06, "loss": 17.2036, "step": 9615 }, { "epoch": 0.17577275302977682, "grad_norm": 5.675338688006315, "learning_rate": 9.453157252476793e-06, "loss": 17.1771, "step": 9616 }, { "epoch": 0.17579103222622333, "grad_norm": 7.9498118466028185, "learning_rate": 9.453022640567936e-06, "loss": 17.8481, "step": 9617 }, { "epoch": 0.17580931142266987, "grad_norm": 7.229448778468172, "learning_rate": 9.45288801305162e-06, "loss": 17.6368, "step": 9618 }, { "epoch": 0.17582759061911638, "grad_norm": 6.703388893560162, "learning_rate": 9.452753369928318e-06, "loss": 17.4984, "step": 9619 }, { "epoch": 0.1758458698155629, "grad_norm": 6.976151557876985, "learning_rate": 9.452618711198503e-06, "loss": 17.7493, "step": 9620 }, { "epoch": 0.17586414901200942, "grad_norm": 6.700961464767145, "learning_rate": 9.452484036862644e-06, "loss": 17.6641, "step": 9621 }, { "epoch": 0.17588242820845595, "grad_norm": 7.089096875826108, "learning_rate": 9.452349346921217e-06, "loss": 17.7229, "step": 9622 }, { "epoch": 0.1759007074049025, "grad_norm": 6.745625988494399, "learning_rate": 9.45221464137469e-06, "loss": 17.4635, "step": 9623 }, { "epoch": 0.175918986601349, "grad_norm": 5.519624368429729, "learning_rate": 9.452079920223538e-06, "loss": 16.9726, "step": 9624 }, { "epoch": 0.17593726579779553, "grad_norm": 7.556663734793128, "learning_rate": 9.451945183468232e-06, "loss": 17.9782, "step": 9625 }, { "epoch": 0.17595554499424204, "grad_norm": 7.05912937539355, "learning_rate": 9.451810431109245e-06, "loss": 17.5649, "step": 9626 }, { "epoch": 0.17597382419068858, "grad_norm": 7.26119786780201, "learning_rate": 9.451675663147049e-06, "loss": 17.8877, "step": 9627 }, { "epoch": 0.1759921033871351, "grad_norm": 6.14689326735572, "learning_rate": 9.451540879582115e-06, "loss": 17.3651, "step": 9628 }, { "epoch": 0.17601038258358162, "grad_norm": 7.051044815148934, "learning_rate": 9.451406080414915e-06, "loss": 17.7112, "step": 9629 }, { "epoch": 0.17602866178002816, "grad_norm": 8.696622023622663, "learning_rate": 9.451271265645925e-06, "loss": 17.8157, "step": 9630 }, { "epoch": 0.17604694097647466, "grad_norm": 7.096216250790805, "learning_rate": 9.451136435275617e-06, "loss": 17.6581, "step": 9631 }, { "epoch": 0.1760652201729212, "grad_norm": 6.879831978724544, "learning_rate": 9.45100158930446e-06, "loss": 17.526, "step": 9632 }, { "epoch": 0.17608349936936774, "grad_norm": 6.42182044924456, "learning_rate": 9.450866727732929e-06, "loss": 17.5388, "step": 9633 }, { "epoch": 0.17610177856581424, "grad_norm": 6.712998064872013, "learning_rate": 9.450731850561496e-06, "loss": 17.6062, "step": 9634 }, { "epoch": 0.17612005776226078, "grad_norm": 7.162659350706177, "learning_rate": 9.450596957790635e-06, "loss": 17.6215, "step": 9635 }, { "epoch": 0.1761383369587073, "grad_norm": 8.155756487703615, "learning_rate": 9.450462049420816e-06, "loss": 18.093, "step": 9636 }, { "epoch": 0.17615661615515382, "grad_norm": 7.107062681470219, "learning_rate": 9.450327125452517e-06, "loss": 17.442, "step": 9637 }, { "epoch": 0.17617489535160033, "grad_norm": 6.436645919508923, "learning_rate": 9.450192185886205e-06, "loss": 17.4097, "step": 9638 }, { "epoch": 0.17619317454804687, "grad_norm": 7.073010915110433, "learning_rate": 9.450057230722356e-06, "loss": 17.6839, "step": 9639 }, { "epoch": 0.1762114537444934, "grad_norm": 6.3032993215199635, "learning_rate": 9.449922259961443e-06, "loss": 17.4623, "step": 9640 }, { "epoch": 0.1762297329409399, "grad_norm": 6.099549268477369, "learning_rate": 9.44978727360394e-06, "loss": 17.335, "step": 9641 }, { "epoch": 0.17624801213738645, "grad_norm": 7.786219841755767, "learning_rate": 9.449652271650314e-06, "loss": 18.2596, "step": 9642 }, { "epoch": 0.17626629133383295, "grad_norm": 5.731132304582805, "learning_rate": 9.449517254101048e-06, "loss": 17.0984, "step": 9643 }, { "epoch": 0.1762845705302795, "grad_norm": 9.083475202687568, "learning_rate": 9.449382220956607e-06, "loss": 18.6494, "step": 9644 }, { "epoch": 0.17630284972672602, "grad_norm": 6.359743207189475, "learning_rate": 9.449247172217468e-06, "loss": 17.3785, "step": 9645 }, { "epoch": 0.17632112892317253, "grad_norm": 7.051513680981001, "learning_rate": 9.449112107884105e-06, "loss": 17.505, "step": 9646 }, { "epoch": 0.17633940811961907, "grad_norm": 6.2166941084379985, "learning_rate": 9.448977027956989e-06, "loss": 17.4986, "step": 9647 }, { "epoch": 0.17635768731606558, "grad_norm": 7.336685680841691, "learning_rate": 9.448841932436596e-06, "loss": 17.8284, "step": 9648 }, { "epoch": 0.1763759665125121, "grad_norm": 7.702246485336523, "learning_rate": 9.448706821323396e-06, "loss": 17.7406, "step": 9649 }, { "epoch": 0.17639424570895865, "grad_norm": 6.4258054404145355, "learning_rate": 9.448571694617868e-06, "loss": 17.4923, "step": 9650 }, { "epoch": 0.17641252490540515, "grad_norm": 9.007529205106438, "learning_rate": 9.448436552320479e-06, "loss": 18.8918, "step": 9651 }, { "epoch": 0.1764308041018517, "grad_norm": 8.425281287035055, "learning_rate": 9.448301394431707e-06, "loss": 18.4124, "step": 9652 }, { "epoch": 0.1764490832982982, "grad_norm": 8.206058412032595, "learning_rate": 9.448166220952025e-06, "loss": 18.5093, "step": 9653 }, { "epoch": 0.17646736249474473, "grad_norm": 9.808163777383681, "learning_rate": 9.448031031881905e-06, "loss": 18.3022, "step": 9654 }, { "epoch": 0.17648564169119124, "grad_norm": 6.733960562872665, "learning_rate": 9.447895827221822e-06, "loss": 17.687, "step": 9655 }, { "epoch": 0.17650392088763778, "grad_norm": 8.096405222635829, "learning_rate": 9.447760606972252e-06, "loss": 18.2036, "step": 9656 }, { "epoch": 0.1765222000840843, "grad_norm": 7.329142019655432, "learning_rate": 9.447625371133667e-06, "loss": 17.9571, "step": 9657 }, { "epoch": 0.17654047928053082, "grad_norm": 6.581840834842928, "learning_rate": 9.44749011970654e-06, "loss": 17.5703, "step": 9658 }, { "epoch": 0.17655875847697736, "grad_norm": 6.129898632277393, "learning_rate": 9.447354852691345e-06, "loss": 17.1393, "step": 9659 }, { "epoch": 0.17657703767342386, "grad_norm": 6.762291526337835, "learning_rate": 9.44721957008856e-06, "loss": 17.4261, "step": 9660 }, { "epoch": 0.1765953168698704, "grad_norm": 6.813084022090381, "learning_rate": 9.447084271898654e-06, "loss": 17.7524, "step": 9661 }, { "epoch": 0.17661359606631694, "grad_norm": 7.259022069175019, "learning_rate": 9.446948958122105e-06, "loss": 17.8218, "step": 9662 }, { "epoch": 0.17663187526276344, "grad_norm": 6.345286917442418, "learning_rate": 9.446813628759385e-06, "loss": 17.5285, "step": 9663 }, { "epoch": 0.17665015445920998, "grad_norm": 7.507549321072424, "learning_rate": 9.44667828381097e-06, "loss": 17.8682, "step": 9664 }, { "epoch": 0.1766684336556565, "grad_norm": 8.854101984446455, "learning_rate": 9.446542923277334e-06, "loss": 18.0953, "step": 9665 }, { "epoch": 0.17668671285210302, "grad_norm": 7.0324185514608075, "learning_rate": 9.446407547158948e-06, "loss": 17.8778, "step": 9666 }, { "epoch": 0.17670499204854956, "grad_norm": 6.7879172787870825, "learning_rate": 9.44627215545629e-06, "loss": 17.4351, "step": 9667 }, { "epoch": 0.17672327124499607, "grad_norm": 8.513656505846203, "learning_rate": 9.446136748169836e-06, "loss": 18.1137, "step": 9668 }, { "epoch": 0.1767415504414426, "grad_norm": 7.215523316095467, "learning_rate": 9.446001325300058e-06, "loss": 17.8333, "step": 9669 }, { "epoch": 0.1767598296378891, "grad_norm": 8.158464109641324, "learning_rate": 9.445865886847429e-06, "loss": 18.5054, "step": 9670 }, { "epoch": 0.17677810883433565, "grad_norm": 6.687987689542999, "learning_rate": 9.445730432812429e-06, "loss": 17.5526, "step": 9671 }, { "epoch": 0.17679638803078215, "grad_norm": 6.478660305319019, "learning_rate": 9.445594963195529e-06, "loss": 17.5868, "step": 9672 }, { "epoch": 0.1768146672272287, "grad_norm": 5.151104796398375, "learning_rate": 9.445459477997203e-06, "loss": 16.9636, "step": 9673 }, { "epoch": 0.17683294642367522, "grad_norm": 7.433461809978908, "learning_rate": 9.445323977217927e-06, "loss": 18.2122, "step": 9674 }, { "epoch": 0.17685122562012173, "grad_norm": 7.8410617284588255, "learning_rate": 9.445188460858176e-06, "loss": 17.9256, "step": 9675 }, { "epoch": 0.17686950481656827, "grad_norm": 7.321948538376839, "learning_rate": 9.445052928918428e-06, "loss": 17.8209, "step": 9676 }, { "epoch": 0.17688778401301478, "grad_norm": 6.376730731198681, "learning_rate": 9.444917381399153e-06, "loss": 17.3759, "step": 9677 }, { "epoch": 0.1769060632094613, "grad_norm": 7.316377252451896, "learning_rate": 9.444781818300828e-06, "loss": 18.0705, "step": 9678 }, { "epoch": 0.17692434240590785, "grad_norm": 7.027882068028113, "learning_rate": 9.444646239623929e-06, "loss": 17.7918, "step": 9679 }, { "epoch": 0.17694262160235436, "grad_norm": 7.850501897469477, "learning_rate": 9.444510645368932e-06, "loss": 17.9935, "step": 9680 }, { "epoch": 0.1769609007988009, "grad_norm": 7.419180565871145, "learning_rate": 9.444375035536309e-06, "loss": 17.9222, "step": 9681 }, { "epoch": 0.1769791799952474, "grad_norm": 6.110589969967948, "learning_rate": 9.444239410126538e-06, "loss": 17.4264, "step": 9682 }, { "epoch": 0.17699745919169393, "grad_norm": 6.622212880277883, "learning_rate": 9.444103769140094e-06, "loss": 17.6152, "step": 9683 }, { "epoch": 0.17701573838814047, "grad_norm": 7.350055275318429, "learning_rate": 9.44396811257745e-06, "loss": 18.2224, "step": 9684 }, { "epoch": 0.17703401758458698, "grad_norm": 5.899957783709529, "learning_rate": 9.443832440439084e-06, "loss": 17.0868, "step": 9685 }, { "epoch": 0.1770522967810335, "grad_norm": 9.15888569103452, "learning_rate": 9.443696752725473e-06, "loss": 18.4873, "step": 9686 }, { "epoch": 0.17707057597748002, "grad_norm": 7.1445693175578775, "learning_rate": 9.443561049437089e-06, "loss": 17.8865, "step": 9687 }, { "epoch": 0.17708885517392656, "grad_norm": 8.27896390971788, "learning_rate": 9.44342533057441e-06, "loss": 17.904, "step": 9688 }, { "epoch": 0.17710713437037306, "grad_norm": 7.739589589633317, "learning_rate": 9.443289596137909e-06, "loss": 17.9802, "step": 9689 }, { "epoch": 0.1771254135668196, "grad_norm": 6.871999460302412, "learning_rate": 9.443153846128063e-06, "loss": 17.5567, "step": 9690 }, { "epoch": 0.17714369276326614, "grad_norm": 6.700836374363081, "learning_rate": 9.443018080545352e-06, "loss": 17.5755, "step": 9691 }, { "epoch": 0.17716197195971264, "grad_norm": 6.385486423826585, "learning_rate": 9.442882299390246e-06, "loss": 17.4786, "step": 9692 }, { "epoch": 0.17718025115615918, "grad_norm": 7.213933951038367, "learning_rate": 9.442746502663223e-06, "loss": 17.8697, "step": 9693 }, { "epoch": 0.1771985303526057, "grad_norm": 7.214876349743183, "learning_rate": 9.442610690364758e-06, "loss": 17.9732, "step": 9694 }, { "epoch": 0.17721680954905222, "grad_norm": 6.9425163586911225, "learning_rate": 9.44247486249533e-06, "loss": 17.7903, "step": 9695 }, { "epoch": 0.17723508874549876, "grad_norm": 6.178478150841416, "learning_rate": 9.442339019055412e-06, "loss": 17.3213, "step": 9696 }, { "epoch": 0.17725336794194527, "grad_norm": 6.278235189723746, "learning_rate": 9.442203160045482e-06, "loss": 17.541, "step": 9697 }, { "epoch": 0.1772716471383918, "grad_norm": 6.115087524574453, "learning_rate": 9.442067285466014e-06, "loss": 17.7573, "step": 9698 }, { "epoch": 0.1772899263348383, "grad_norm": 7.899599701776045, "learning_rate": 9.441931395317488e-06, "loss": 18.0926, "step": 9699 }, { "epoch": 0.17730820553128485, "grad_norm": 6.094800422486613, "learning_rate": 9.441795489600374e-06, "loss": 17.2838, "step": 9700 }, { "epoch": 0.17732648472773138, "grad_norm": 8.220077362304531, "learning_rate": 9.441659568315156e-06, "loss": 18.1649, "step": 9701 }, { "epoch": 0.1773447639241779, "grad_norm": 8.180979102384269, "learning_rate": 9.441523631462306e-06, "loss": 18.0576, "step": 9702 }, { "epoch": 0.17736304312062443, "grad_norm": 6.821931812911939, "learning_rate": 9.4413876790423e-06, "loss": 17.7972, "step": 9703 }, { "epoch": 0.17738132231707093, "grad_norm": 9.030951775341116, "learning_rate": 9.441251711055616e-06, "loss": 18.5217, "step": 9704 }, { "epoch": 0.17739960151351747, "grad_norm": 6.0484269852928225, "learning_rate": 9.44111572750273e-06, "loss": 17.4727, "step": 9705 }, { "epoch": 0.17741788070996398, "grad_norm": 7.721039215305776, "learning_rate": 9.440979728384118e-06, "loss": 18.0058, "step": 9706 }, { "epoch": 0.1774361599064105, "grad_norm": 6.295985288778256, "learning_rate": 9.440843713700258e-06, "loss": 17.2709, "step": 9707 }, { "epoch": 0.17745443910285705, "grad_norm": 7.319272852063073, "learning_rate": 9.440707683451627e-06, "loss": 17.9808, "step": 9708 }, { "epoch": 0.17747271829930356, "grad_norm": 9.102364113732065, "learning_rate": 9.4405716376387e-06, "loss": 17.7096, "step": 9709 }, { "epoch": 0.1774909974957501, "grad_norm": 6.809521540271753, "learning_rate": 9.440435576261957e-06, "loss": 17.5893, "step": 9710 }, { "epoch": 0.1775092766921966, "grad_norm": 6.852650002439833, "learning_rate": 9.44029949932187e-06, "loss": 17.7249, "step": 9711 }, { "epoch": 0.17752755588864313, "grad_norm": 6.526419833756175, "learning_rate": 9.440163406818919e-06, "loss": 17.6222, "step": 9712 }, { "epoch": 0.17754583508508967, "grad_norm": 6.898101318285359, "learning_rate": 9.44002729875358e-06, "loss": 17.765, "step": 9713 }, { "epoch": 0.17756411428153618, "grad_norm": 6.788442053561319, "learning_rate": 9.439891175126331e-06, "loss": 17.6916, "step": 9714 }, { "epoch": 0.17758239347798271, "grad_norm": 6.906235154036778, "learning_rate": 9.43975503593765e-06, "loss": 17.5038, "step": 9715 }, { "epoch": 0.17760067267442922, "grad_norm": 7.042972173507783, "learning_rate": 9.439618881188014e-06, "loss": 17.6661, "step": 9716 }, { "epoch": 0.17761895187087576, "grad_norm": 7.19275145236644, "learning_rate": 9.439482710877896e-06, "loss": 17.5986, "step": 9717 }, { "epoch": 0.1776372310673223, "grad_norm": 7.130931651391098, "learning_rate": 9.439346525007777e-06, "loss": 17.9464, "step": 9718 }, { "epoch": 0.1776555102637688, "grad_norm": 6.67146969845684, "learning_rate": 9.439210323578134e-06, "loss": 17.3904, "step": 9719 }, { "epoch": 0.17767378946021534, "grad_norm": 7.465071058551338, "learning_rate": 9.439074106589445e-06, "loss": 17.5445, "step": 9720 }, { "epoch": 0.17769206865666184, "grad_norm": 7.690613516101903, "learning_rate": 9.438937874042185e-06, "loss": 17.9147, "step": 9721 }, { "epoch": 0.17771034785310838, "grad_norm": 6.656617422977974, "learning_rate": 9.438801625936832e-06, "loss": 17.6754, "step": 9722 }, { "epoch": 0.1777286270495549, "grad_norm": 7.680997168893982, "learning_rate": 9.438665362273868e-06, "loss": 17.6818, "step": 9723 }, { "epoch": 0.17774690624600142, "grad_norm": 6.51239015634626, "learning_rate": 9.438529083053765e-06, "loss": 17.5307, "step": 9724 }, { "epoch": 0.17776518544244796, "grad_norm": 6.603219062765521, "learning_rate": 9.438392788277002e-06, "loss": 17.8635, "step": 9725 }, { "epoch": 0.17778346463889447, "grad_norm": 7.559472034188811, "learning_rate": 9.438256477944058e-06, "loss": 17.8341, "step": 9726 }, { "epoch": 0.177801743835341, "grad_norm": 7.86013382771788, "learning_rate": 9.438120152055413e-06, "loss": 18.0118, "step": 9727 }, { "epoch": 0.1778200230317875, "grad_norm": 6.620496532263314, "learning_rate": 9.437983810611537e-06, "loss": 17.5389, "step": 9728 }, { "epoch": 0.17783830222823405, "grad_norm": 7.8524726290862175, "learning_rate": 9.437847453612916e-06, "loss": 18.0236, "step": 9729 }, { "epoch": 0.17785658142468058, "grad_norm": 6.803781577666788, "learning_rate": 9.437711081060024e-06, "loss": 17.722, "step": 9730 }, { "epoch": 0.1778748606211271, "grad_norm": 6.804629365353588, "learning_rate": 9.437574692953339e-06, "loss": 17.4993, "step": 9731 }, { "epoch": 0.17789313981757363, "grad_norm": 6.2524073121621715, "learning_rate": 9.437438289293342e-06, "loss": 17.3258, "step": 9732 }, { "epoch": 0.17791141901402013, "grad_norm": 6.462773309750215, "learning_rate": 9.437301870080507e-06, "loss": 17.9451, "step": 9733 }, { "epoch": 0.17792969821046667, "grad_norm": 6.460433370820935, "learning_rate": 9.437165435315315e-06, "loss": 17.2655, "step": 9734 }, { "epoch": 0.1779479774069132, "grad_norm": 7.436405726537002, "learning_rate": 9.437028984998242e-06, "loss": 17.7149, "step": 9735 }, { "epoch": 0.1779662566033597, "grad_norm": 6.176887549546213, "learning_rate": 9.436892519129767e-06, "loss": 17.4163, "step": 9736 }, { "epoch": 0.17798453579980625, "grad_norm": 7.730634578467468, "learning_rate": 9.436756037710371e-06, "loss": 17.4026, "step": 9737 }, { "epoch": 0.17800281499625276, "grad_norm": 5.161988822903628, "learning_rate": 9.436619540740528e-06, "loss": 16.8931, "step": 9738 }, { "epoch": 0.1780210941926993, "grad_norm": 7.344605573317709, "learning_rate": 9.436483028220719e-06, "loss": 17.7463, "step": 9739 }, { "epoch": 0.1780393733891458, "grad_norm": 8.046813266600296, "learning_rate": 9.436346500151423e-06, "loss": 17.9651, "step": 9740 }, { "epoch": 0.17805765258559234, "grad_norm": 6.393431035630223, "learning_rate": 9.436209956533117e-06, "loss": 17.5466, "step": 9741 }, { "epoch": 0.17807593178203887, "grad_norm": 6.994112434580161, "learning_rate": 9.436073397366282e-06, "loss": 17.9602, "step": 9742 }, { "epoch": 0.17809421097848538, "grad_norm": 7.571745779934617, "learning_rate": 9.435936822651391e-06, "loss": 17.9859, "step": 9743 }, { "epoch": 0.17811249017493191, "grad_norm": 7.051017052383482, "learning_rate": 9.435800232388927e-06, "loss": 17.8538, "step": 9744 }, { "epoch": 0.17813076937137842, "grad_norm": 7.086992505485541, "learning_rate": 9.43566362657937e-06, "loss": 17.9082, "step": 9745 }, { "epoch": 0.17814904856782496, "grad_norm": 7.575001476737331, "learning_rate": 9.435527005223197e-06, "loss": 18.3628, "step": 9746 }, { "epoch": 0.1781673277642715, "grad_norm": 6.620470467188751, "learning_rate": 9.435390368320885e-06, "loss": 17.7196, "step": 9747 }, { "epoch": 0.178185606960718, "grad_norm": 6.767458937127244, "learning_rate": 9.435253715872917e-06, "loss": 17.5436, "step": 9748 }, { "epoch": 0.17820388615716454, "grad_norm": 7.768517323398791, "learning_rate": 9.435117047879768e-06, "loss": 17.5803, "step": 9749 }, { "epoch": 0.17822216535361105, "grad_norm": 8.52002998348253, "learning_rate": 9.434980364341917e-06, "loss": 18.5091, "step": 9750 }, { "epoch": 0.17824044455005758, "grad_norm": 8.60681587905685, "learning_rate": 9.434843665259847e-06, "loss": 18.4451, "step": 9751 }, { "epoch": 0.17825872374650412, "grad_norm": 6.936536731140007, "learning_rate": 9.434706950634034e-06, "loss": 17.6692, "step": 9752 }, { "epoch": 0.17827700294295062, "grad_norm": 7.080582829257614, "learning_rate": 9.434570220464959e-06, "loss": 17.5401, "step": 9753 }, { "epoch": 0.17829528213939716, "grad_norm": 6.9923608229850815, "learning_rate": 9.434433474753098e-06, "loss": 17.8878, "step": 9754 }, { "epoch": 0.17831356133584367, "grad_norm": 8.90585203556734, "learning_rate": 9.434296713498934e-06, "loss": 18.6246, "step": 9755 }, { "epoch": 0.1783318405322902, "grad_norm": 7.578519220791576, "learning_rate": 9.434159936702943e-06, "loss": 17.9341, "step": 9756 }, { "epoch": 0.1783501197287367, "grad_norm": 6.728053978855922, "learning_rate": 9.434023144365608e-06, "loss": 17.7731, "step": 9757 }, { "epoch": 0.17836839892518325, "grad_norm": 7.003711526559738, "learning_rate": 9.433886336487407e-06, "loss": 17.5275, "step": 9758 }, { "epoch": 0.17838667812162978, "grad_norm": 8.455193694248191, "learning_rate": 9.433749513068818e-06, "loss": 18.0235, "step": 9759 }, { "epoch": 0.1784049573180763, "grad_norm": 5.342623445358985, "learning_rate": 9.433612674110322e-06, "loss": 16.9604, "step": 9760 }, { "epoch": 0.17842323651452283, "grad_norm": 7.356764788393615, "learning_rate": 9.433475819612399e-06, "loss": 17.9372, "step": 9761 }, { "epoch": 0.17844151571096933, "grad_norm": 7.828348541507714, "learning_rate": 9.433338949575527e-06, "loss": 17.8288, "step": 9762 }, { "epoch": 0.17845979490741587, "grad_norm": 6.98720409664005, "learning_rate": 9.433202064000187e-06, "loss": 17.7981, "step": 9763 }, { "epoch": 0.1784780741038624, "grad_norm": 6.788120270191315, "learning_rate": 9.433065162886859e-06, "loss": 17.7682, "step": 9764 }, { "epoch": 0.1784963533003089, "grad_norm": 5.223666450826554, "learning_rate": 9.432928246236022e-06, "loss": 16.8469, "step": 9765 }, { "epoch": 0.17851463249675545, "grad_norm": 7.726231461511586, "learning_rate": 9.432791314048156e-06, "loss": 18.1611, "step": 9766 }, { "epoch": 0.17853291169320196, "grad_norm": 6.963165020375716, "learning_rate": 9.432654366323741e-06, "loss": 17.8006, "step": 9767 }, { "epoch": 0.1785511908896485, "grad_norm": 5.360391003965571, "learning_rate": 9.432517403063257e-06, "loss": 17.1445, "step": 9768 }, { "epoch": 0.17856947008609503, "grad_norm": 6.51934534987101, "learning_rate": 9.432380424267185e-06, "loss": 17.8409, "step": 9769 }, { "epoch": 0.17858774928254154, "grad_norm": 6.2131845811506095, "learning_rate": 9.432243429936003e-06, "loss": 17.5967, "step": 9770 }, { "epoch": 0.17860602847898807, "grad_norm": 6.490373216880616, "learning_rate": 9.432106420070193e-06, "loss": 17.188, "step": 9771 }, { "epoch": 0.17862430767543458, "grad_norm": 7.772721584194353, "learning_rate": 9.431969394670235e-06, "loss": 18.0158, "step": 9772 }, { "epoch": 0.17864258687188111, "grad_norm": 6.6291600442599625, "learning_rate": 9.431832353736608e-06, "loss": 17.5063, "step": 9773 }, { "epoch": 0.17866086606832762, "grad_norm": 6.337764937297904, "learning_rate": 9.431695297269794e-06, "loss": 17.4012, "step": 9774 }, { "epoch": 0.17867914526477416, "grad_norm": 5.528648977290598, "learning_rate": 9.431558225270272e-06, "loss": 17.1467, "step": 9775 }, { "epoch": 0.1786974244612207, "grad_norm": 8.17410182241707, "learning_rate": 9.431421137738523e-06, "loss": 18.0794, "step": 9776 }, { "epoch": 0.1787157036576672, "grad_norm": 7.257810407204982, "learning_rate": 9.431284034675029e-06, "loss": 17.6139, "step": 9777 }, { "epoch": 0.17873398285411374, "grad_norm": 7.471120028201146, "learning_rate": 9.431146916080267e-06, "loss": 17.7815, "step": 9778 }, { "epoch": 0.17875226205056025, "grad_norm": 6.23432741622461, "learning_rate": 9.431009781954721e-06, "loss": 17.3911, "step": 9779 }, { "epoch": 0.17877054124700678, "grad_norm": 8.843764130326498, "learning_rate": 9.430872632298868e-06, "loss": 18.4642, "step": 9780 }, { "epoch": 0.17878882044345332, "grad_norm": 6.205504168993172, "learning_rate": 9.430735467113192e-06, "loss": 17.344, "step": 9781 }, { "epoch": 0.17880709963989982, "grad_norm": 8.572140095617657, "learning_rate": 9.430598286398174e-06, "loss": 18.2109, "step": 9782 }, { "epoch": 0.17882537883634636, "grad_norm": 9.004655976115545, "learning_rate": 9.430461090154293e-06, "loss": 18.4639, "step": 9783 }, { "epoch": 0.17884365803279287, "grad_norm": 8.06569636279287, "learning_rate": 9.43032387838203e-06, "loss": 18.4043, "step": 9784 }, { "epoch": 0.1788619372292394, "grad_norm": 6.832525538322095, "learning_rate": 9.430186651081865e-06, "loss": 17.5653, "step": 9785 }, { "epoch": 0.17888021642568594, "grad_norm": 7.490859644683718, "learning_rate": 9.430049408254282e-06, "loss": 18.0886, "step": 9786 }, { "epoch": 0.17889849562213245, "grad_norm": 7.045513759459739, "learning_rate": 9.429912149899758e-06, "loss": 17.5878, "step": 9787 }, { "epoch": 0.17891677481857898, "grad_norm": 8.059792695285463, "learning_rate": 9.429774876018779e-06, "loss": 18.1043, "step": 9788 }, { "epoch": 0.1789350540150255, "grad_norm": 6.0787364962803645, "learning_rate": 9.429637586611822e-06, "loss": 17.3551, "step": 9789 }, { "epoch": 0.17895333321147203, "grad_norm": 4.943654424937247, "learning_rate": 9.42950028167937e-06, "loss": 16.8844, "step": 9790 }, { "epoch": 0.17897161240791853, "grad_norm": 6.14956192712607, "learning_rate": 9.429362961221904e-06, "loss": 17.6326, "step": 9791 }, { "epoch": 0.17898989160436507, "grad_norm": 8.847278040251075, "learning_rate": 9.429225625239906e-06, "loss": 18.5878, "step": 9792 }, { "epoch": 0.1790081708008116, "grad_norm": 10.696567532179113, "learning_rate": 9.429088273733855e-06, "loss": 18.5805, "step": 9793 }, { "epoch": 0.1790264499972581, "grad_norm": 6.926405386042904, "learning_rate": 9.428950906704234e-06, "loss": 17.7401, "step": 9794 }, { "epoch": 0.17904472919370465, "grad_norm": 6.489936352810286, "learning_rate": 9.428813524151525e-06, "loss": 17.5293, "step": 9795 }, { "epoch": 0.17906300839015116, "grad_norm": 6.964940518291793, "learning_rate": 9.428676126076208e-06, "loss": 17.8764, "step": 9796 }, { "epoch": 0.1790812875865977, "grad_norm": 7.295689182900891, "learning_rate": 9.428538712478767e-06, "loss": 17.7729, "step": 9797 }, { "epoch": 0.17909956678304423, "grad_norm": 7.437427660905084, "learning_rate": 9.428401283359682e-06, "loss": 17.875, "step": 9798 }, { "epoch": 0.17911784597949074, "grad_norm": 7.679807104368676, "learning_rate": 9.428263838719434e-06, "loss": 17.7642, "step": 9799 }, { "epoch": 0.17913612517593727, "grad_norm": 6.1585842593847895, "learning_rate": 9.428126378558506e-06, "loss": 17.3325, "step": 9800 }, { "epoch": 0.17915440437238378, "grad_norm": 7.7743560244330405, "learning_rate": 9.427988902877378e-06, "loss": 17.3411, "step": 9801 }, { "epoch": 0.17917268356883032, "grad_norm": 6.914870919299745, "learning_rate": 9.427851411676535e-06, "loss": 17.7374, "step": 9802 }, { "epoch": 0.17919096276527685, "grad_norm": 7.269947505575387, "learning_rate": 9.427713904956455e-06, "loss": 17.6458, "step": 9803 }, { "epoch": 0.17920924196172336, "grad_norm": 7.7465526450080375, "learning_rate": 9.427576382717624e-06, "loss": 18.127, "step": 9804 }, { "epoch": 0.1792275211581699, "grad_norm": 7.438328267476282, "learning_rate": 9.427438844960521e-06, "loss": 17.9759, "step": 9805 }, { "epoch": 0.1792458003546164, "grad_norm": 6.431880029486637, "learning_rate": 9.42730129168563e-06, "loss": 17.5455, "step": 9806 }, { "epoch": 0.17926407955106294, "grad_norm": 7.497073928030626, "learning_rate": 9.42716372289343e-06, "loss": 17.7965, "step": 9807 }, { "epoch": 0.17928235874750945, "grad_norm": 6.6020967923882345, "learning_rate": 9.427026138584408e-06, "loss": 17.5642, "step": 9808 }, { "epoch": 0.17930063794395598, "grad_norm": 7.76060879963446, "learning_rate": 9.426888538759042e-06, "loss": 17.9669, "step": 9809 }, { "epoch": 0.17931891714040252, "grad_norm": 7.807309750568793, "learning_rate": 9.426750923417815e-06, "loss": 18.3616, "step": 9810 }, { "epoch": 0.17933719633684903, "grad_norm": 7.828851331167055, "learning_rate": 9.42661329256121e-06, "loss": 18.0805, "step": 9811 }, { "epoch": 0.17935547553329556, "grad_norm": 6.59582425984953, "learning_rate": 9.426475646189713e-06, "loss": 17.4784, "step": 9812 }, { "epoch": 0.17937375472974207, "grad_norm": 7.099462090867203, "learning_rate": 9.426337984303799e-06, "loss": 17.8802, "step": 9813 }, { "epoch": 0.1793920339261886, "grad_norm": 8.122302428401728, "learning_rate": 9.426200306903957e-06, "loss": 18.4685, "step": 9814 }, { "epoch": 0.17941031312263514, "grad_norm": 6.179400084084576, "learning_rate": 9.426062613990667e-06, "loss": 17.3515, "step": 9815 }, { "epoch": 0.17942859231908165, "grad_norm": 7.911103376841772, "learning_rate": 9.42592490556441e-06, "loss": 18.0463, "step": 9816 }, { "epoch": 0.17944687151552818, "grad_norm": 7.205215188326355, "learning_rate": 9.425787181625671e-06, "loss": 17.8049, "step": 9817 }, { "epoch": 0.1794651507119747, "grad_norm": 6.062865053906306, "learning_rate": 9.425649442174933e-06, "loss": 17.1905, "step": 9818 }, { "epoch": 0.17948342990842123, "grad_norm": 7.380725263172451, "learning_rate": 9.425511687212677e-06, "loss": 18.1633, "step": 9819 }, { "epoch": 0.17950170910486776, "grad_norm": 6.652388341966013, "learning_rate": 9.425373916739384e-06, "loss": 17.4928, "step": 9820 }, { "epoch": 0.17951998830131427, "grad_norm": 6.9587652781673555, "learning_rate": 9.425236130755544e-06, "loss": 17.7109, "step": 9821 }, { "epoch": 0.1795382674977608, "grad_norm": 6.35310895058361, "learning_rate": 9.425098329261632e-06, "loss": 17.3162, "step": 9822 }, { "epoch": 0.17955654669420731, "grad_norm": 7.297330906963598, "learning_rate": 9.424960512258136e-06, "loss": 18.0997, "step": 9823 }, { "epoch": 0.17957482589065385, "grad_norm": 6.775515192904784, "learning_rate": 9.424822679745536e-06, "loss": 17.8178, "step": 9824 }, { "epoch": 0.17959310508710036, "grad_norm": 9.758082204390167, "learning_rate": 9.424684831724318e-06, "loss": 18.8286, "step": 9825 }, { "epoch": 0.1796113842835469, "grad_norm": 7.322342902333123, "learning_rate": 9.424546968194963e-06, "loss": 17.5736, "step": 9826 }, { "epoch": 0.17962966347999343, "grad_norm": 5.823542098051173, "learning_rate": 9.424409089157955e-06, "loss": 17.1623, "step": 9827 }, { "epoch": 0.17964794267643994, "grad_norm": 7.162051981471785, "learning_rate": 9.424271194613776e-06, "loss": 17.82, "step": 9828 }, { "epoch": 0.17966622187288647, "grad_norm": 6.903782555726466, "learning_rate": 9.424133284562911e-06, "loss": 17.8435, "step": 9829 }, { "epoch": 0.17968450106933298, "grad_norm": 7.52178146204798, "learning_rate": 9.423995359005844e-06, "loss": 17.5393, "step": 9830 }, { "epoch": 0.17970278026577952, "grad_norm": 6.0192047068100765, "learning_rate": 9.423857417943057e-06, "loss": 17.2282, "step": 9831 }, { "epoch": 0.17972105946222605, "grad_norm": 6.631329068271743, "learning_rate": 9.423719461375031e-06, "loss": 17.7655, "step": 9832 }, { "epoch": 0.17973933865867256, "grad_norm": 5.770385109698054, "learning_rate": 9.423581489302255e-06, "loss": 17.3378, "step": 9833 }, { "epoch": 0.1797576178551191, "grad_norm": 6.433071754790578, "learning_rate": 9.423443501725209e-06, "loss": 17.3947, "step": 9834 }, { "epoch": 0.1797758970515656, "grad_norm": 7.123725461827745, "learning_rate": 9.423305498644376e-06, "loss": 17.3041, "step": 9835 }, { "epoch": 0.17979417624801214, "grad_norm": 6.843623403109756, "learning_rate": 9.423167480060242e-06, "loss": 17.6027, "step": 9836 }, { "epoch": 0.17981245544445867, "grad_norm": 6.312874700147576, "learning_rate": 9.423029445973291e-06, "loss": 17.2771, "step": 9837 }, { "epoch": 0.17983073464090518, "grad_norm": 6.790106535741972, "learning_rate": 9.422891396384004e-06, "loss": 17.3934, "step": 9838 }, { "epoch": 0.17984901383735172, "grad_norm": 6.414037164648971, "learning_rate": 9.422753331292867e-06, "loss": 17.6646, "step": 9839 }, { "epoch": 0.17986729303379823, "grad_norm": 7.2004252066756615, "learning_rate": 9.422615250700363e-06, "loss": 17.9489, "step": 9840 }, { "epoch": 0.17988557223024476, "grad_norm": 7.387366300328245, "learning_rate": 9.422477154606978e-06, "loss": 17.9723, "step": 9841 }, { "epoch": 0.17990385142669127, "grad_norm": 6.858661119487742, "learning_rate": 9.422339043013192e-06, "loss": 17.6167, "step": 9842 }, { "epoch": 0.1799221306231378, "grad_norm": 6.1255287752906336, "learning_rate": 9.422200915919493e-06, "loss": 17.2448, "step": 9843 }, { "epoch": 0.17994040981958434, "grad_norm": 6.7525066962578935, "learning_rate": 9.422062773326361e-06, "loss": 17.758, "step": 9844 }, { "epoch": 0.17995868901603085, "grad_norm": 7.1877365319482855, "learning_rate": 9.421924615234286e-06, "loss": 17.9013, "step": 9845 }, { "epoch": 0.17997696821247738, "grad_norm": 8.481912440458515, "learning_rate": 9.421786441643748e-06, "loss": 18.6036, "step": 9846 }, { "epoch": 0.1799952474089239, "grad_norm": 6.284005726091458, "learning_rate": 9.42164825255523e-06, "loss": 17.2517, "step": 9847 }, { "epoch": 0.18001352660537043, "grad_norm": 6.2068072418850395, "learning_rate": 9.421510047969223e-06, "loss": 17.4497, "step": 9848 }, { "epoch": 0.18003180580181696, "grad_norm": 6.416446149254776, "learning_rate": 9.421371827886203e-06, "loss": 17.3684, "step": 9849 }, { "epoch": 0.18005008499826347, "grad_norm": 6.15143758420953, "learning_rate": 9.42123359230666e-06, "loss": 17.264, "step": 9850 }, { "epoch": 0.18006836419471, "grad_norm": 7.432611704220462, "learning_rate": 9.421095341231077e-06, "loss": 17.6573, "step": 9851 }, { "epoch": 0.18008664339115651, "grad_norm": 6.965229914547005, "learning_rate": 9.420957074659938e-06, "loss": 18.2525, "step": 9852 }, { "epoch": 0.18010492258760305, "grad_norm": 6.079438368269971, "learning_rate": 9.420818792593729e-06, "loss": 17.3922, "step": 9853 }, { "epoch": 0.18012320178404959, "grad_norm": 7.949627215604142, "learning_rate": 9.420680495032932e-06, "loss": 18.1077, "step": 9854 }, { "epoch": 0.1801414809804961, "grad_norm": 6.430165678416542, "learning_rate": 9.420542181978034e-06, "loss": 17.4384, "step": 9855 }, { "epoch": 0.18015976017694263, "grad_norm": 8.106001315581224, "learning_rate": 9.42040385342952e-06, "loss": 18.1259, "step": 9856 }, { "epoch": 0.18017803937338914, "grad_norm": 6.931273081965224, "learning_rate": 9.420265509387874e-06, "loss": 17.6603, "step": 9857 }, { "epoch": 0.18019631856983567, "grad_norm": 7.860322040698806, "learning_rate": 9.420127149853581e-06, "loss": 17.6595, "step": 9858 }, { "epoch": 0.18021459776628218, "grad_norm": 7.522948316284845, "learning_rate": 9.419988774827126e-06, "loss": 17.9882, "step": 9859 }, { "epoch": 0.18023287696272872, "grad_norm": 6.3089213056183535, "learning_rate": 9.419850384308993e-06, "loss": 17.4897, "step": 9860 }, { "epoch": 0.18025115615917525, "grad_norm": 7.5995078159913225, "learning_rate": 9.419711978299668e-06, "loss": 17.979, "step": 9861 }, { "epoch": 0.18026943535562176, "grad_norm": 6.3541903338947066, "learning_rate": 9.419573556799637e-06, "loss": 17.4696, "step": 9862 }, { "epoch": 0.1802877145520683, "grad_norm": 6.597222063316642, "learning_rate": 9.419435119809384e-06, "loss": 17.7096, "step": 9863 }, { "epoch": 0.1803059937485148, "grad_norm": 6.657552047121865, "learning_rate": 9.419296667329394e-06, "loss": 17.4063, "step": 9864 }, { "epoch": 0.18032427294496134, "grad_norm": 6.959719643856069, "learning_rate": 9.419158199360153e-06, "loss": 17.5825, "step": 9865 }, { "epoch": 0.18034255214140787, "grad_norm": 6.589167108452301, "learning_rate": 9.419019715902146e-06, "loss": 17.6088, "step": 9866 }, { "epoch": 0.18036083133785438, "grad_norm": 6.289589791858059, "learning_rate": 9.418881216955858e-06, "loss": 17.3522, "step": 9867 }, { "epoch": 0.18037911053430092, "grad_norm": 6.72399211882093, "learning_rate": 9.418742702521774e-06, "loss": 17.2417, "step": 9868 }, { "epoch": 0.18039738973074743, "grad_norm": 7.565165408967926, "learning_rate": 9.418604172600382e-06, "loss": 17.848, "step": 9869 }, { "epoch": 0.18041566892719396, "grad_norm": 5.029258380477208, "learning_rate": 9.418465627192165e-06, "loss": 16.9919, "step": 9870 }, { "epoch": 0.1804339481236405, "grad_norm": 6.991090099572182, "learning_rate": 9.41832706629761e-06, "loss": 17.768, "step": 9871 }, { "epoch": 0.180452227320087, "grad_norm": 6.551253626491615, "learning_rate": 9.418188489917202e-06, "loss": 17.4717, "step": 9872 }, { "epoch": 0.18047050651653354, "grad_norm": 5.915775501322046, "learning_rate": 9.418049898051425e-06, "loss": 17.1038, "step": 9873 }, { "epoch": 0.18048878571298005, "grad_norm": 7.9635352828765225, "learning_rate": 9.417911290700767e-06, "loss": 18.2166, "step": 9874 }, { "epoch": 0.18050706490942658, "grad_norm": 7.690572860183641, "learning_rate": 9.417772667865714e-06, "loss": 18.1678, "step": 9875 }, { "epoch": 0.1805253441058731, "grad_norm": 6.588647620562926, "learning_rate": 9.417634029546751e-06, "loss": 17.3728, "step": 9876 }, { "epoch": 0.18054362330231963, "grad_norm": 6.904329940733219, "learning_rate": 9.417495375744365e-06, "loss": 17.7944, "step": 9877 }, { "epoch": 0.18056190249876616, "grad_norm": 5.317146962442455, "learning_rate": 9.41735670645904e-06, "loss": 16.975, "step": 9878 }, { "epoch": 0.18058018169521267, "grad_norm": 7.1684649269101195, "learning_rate": 9.417218021691263e-06, "loss": 17.7719, "step": 9879 }, { "epoch": 0.1805984608916592, "grad_norm": 5.861741382044178, "learning_rate": 9.417079321441522e-06, "loss": 17.4348, "step": 9880 }, { "epoch": 0.18061674008810572, "grad_norm": 7.671495340190093, "learning_rate": 9.416940605710298e-06, "loss": 18.0847, "step": 9881 }, { "epoch": 0.18063501928455225, "grad_norm": 7.011991807235905, "learning_rate": 9.416801874498082e-06, "loss": 17.7374, "step": 9882 }, { "epoch": 0.1806532984809988, "grad_norm": 6.787270191127173, "learning_rate": 9.41666312780536e-06, "loss": 17.9021, "step": 9883 }, { "epoch": 0.1806715776774453, "grad_norm": 7.500985617097195, "learning_rate": 9.416524365632615e-06, "loss": 18.2551, "step": 9884 }, { "epoch": 0.18068985687389183, "grad_norm": 6.540146133715169, "learning_rate": 9.416385587980337e-06, "loss": 17.3675, "step": 9885 }, { "epoch": 0.18070813607033834, "grad_norm": 6.088448734189069, "learning_rate": 9.41624679484901e-06, "loss": 17.3463, "step": 9886 }, { "epoch": 0.18072641526678487, "grad_norm": 7.054512600951021, "learning_rate": 9.416107986239121e-06, "loss": 17.8378, "step": 9887 }, { "epoch": 0.1807446944632314, "grad_norm": 6.275529072292613, "learning_rate": 9.415969162151157e-06, "loss": 17.5825, "step": 9888 }, { "epoch": 0.18076297365967792, "grad_norm": 5.658267038377804, "learning_rate": 9.415830322585604e-06, "loss": 17.0669, "step": 9889 }, { "epoch": 0.18078125285612445, "grad_norm": 8.489460603910983, "learning_rate": 9.415691467542948e-06, "loss": 18.1464, "step": 9890 }, { "epoch": 0.18079953205257096, "grad_norm": 6.28354033344867, "learning_rate": 9.415552597023679e-06, "loss": 17.5069, "step": 9891 }, { "epoch": 0.1808178112490175, "grad_norm": 8.075302061599936, "learning_rate": 9.41541371102828e-06, "loss": 17.9517, "step": 9892 }, { "epoch": 0.180836090445464, "grad_norm": 6.797481229337563, "learning_rate": 9.41527480955724e-06, "loss": 17.6253, "step": 9893 }, { "epoch": 0.18085436964191054, "grad_norm": 6.691582855088232, "learning_rate": 9.415135892611043e-06, "loss": 17.5782, "step": 9894 }, { "epoch": 0.18087264883835708, "grad_norm": 8.017505693934982, "learning_rate": 9.414996960190179e-06, "loss": 18.2248, "step": 9895 }, { "epoch": 0.18089092803480358, "grad_norm": 8.452980092894855, "learning_rate": 9.414858012295134e-06, "loss": 18.584, "step": 9896 }, { "epoch": 0.18090920723125012, "grad_norm": 6.917700380185664, "learning_rate": 9.414719048926393e-06, "loss": 17.5312, "step": 9897 }, { "epoch": 0.18092748642769663, "grad_norm": 6.910260643563813, "learning_rate": 9.414580070084446e-06, "loss": 17.5055, "step": 9898 }, { "epoch": 0.18094576562414316, "grad_norm": 10.117433650943333, "learning_rate": 9.41444107576978e-06, "loss": 18.4476, "step": 9899 }, { "epoch": 0.1809640448205897, "grad_norm": 7.18260649555454, "learning_rate": 9.414302065982882e-06, "loss": 17.9295, "step": 9900 }, { "epoch": 0.1809823240170362, "grad_norm": 6.440786598596819, "learning_rate": 9.414163040724235e-06, "loss": 17.7439, "step": 9901 }, { "epoch": 0.18100060321348274, "grad_norm": 7.063733425870379, "learning_rate": 9.414023999994332e-06, "loss": 17.7516, "step": 9902 }, { "epoch": 0.18101888240992925, "grad_norm": 9.131458627224548, "learning_rate": 9.413884943793657e-06, "loss": 18.7895, "step": 9903 }, { "epoch": 0.18103716160637578, "grad_norm": 6.618670036736652, "learning_rate": 9.413745872122698e-06, "loss": 17.6756, "step": 9904 }, { "epoch": 0.18105544080282232, "grad_norm": 7.358234377721218, "learning_rate": 9.413606784981943e-06, "loss": 17.6505, "step": 9905 }, { "epoch": 0.18107371999926883, "grad_norm": 6.151961923717866, "learning_rate": 9.413467682371879e-06, "loss": 17.4507, "step": 9906 }, { "epoch": 0.18109199919571536, "grad_norm": 7.212741272650908, "learning_rate": 9.413328564292994e-06, "loss": 17.6875, "step": 9907 }, { "epoch": 0.18111027839216187, "grad_norm": 8.969986783784599, "learning_rate": 9.413189430745776e-06, "loss": 17.6648, "step": 9908 }, { "epoch": 0.1811285575886084, "grad_norm": 8.318671839400224, "learning_rate": 9.413050281730712e-06, "loss": 18.4372, "step": 9909 }, { "epoch": 0.18114683678505492, "grad_norm": 7.071225881915424, "learning_rate": 9.412911117248289e-06, "loss": 17.5117, "step": 9910 }, { "epoch": 0.18116511598150145, "grad_norm": 6.090548068212628, "learning_rate": 9.412771937298995e-06, "loss": 17.4232, "step": 9911 }, { "epoch": 0.181183395177948, "grad_norm": 7.119644974550266, "learning_rate": 9.412632741883319e-06, "loss": 17.7013, "step": 9912 }, { "epoch": 0.1812016743743945, "grad_norm": 7.907039076382293, "learning_rate": 9.412493531001747e-06, "loss": 18.181, "step": 9913 }, { "epoch": 0.18121995357084103, "grad_norm": 7.043056196251649, "learning_rate": 9.41235430465477e-06, "loss": 17.7467, "step": 9914 }, { "epoch": 0.18123823276728754, "grad_norm": 7.077150538842783, "learning_rate": 9.412215062842872e-06, "loss": 17.6708, "step": 9915 }, { "epoch": 0.18125651196373407, "grad_norm": 9.17201448218564, "learning_rate": 9.412075805566545e-06, "loss": 18.8287, "step": 9916 }, { "epoch": 0.1812747911601806, "grad_norm": 5.606994231286592, "learning_rate": 9.411936532826274e-06, "loss": 17.4046, "step": 9917 }, { "epoch": 0.18129307035662712, "grad_norm": 6.554508488550973, "learning_rate": 9.41179724462255e-06, "loss": 17.5461, "step": 9918 }, { "epoch": 0.18131134955307365, "grad_norm": 7.503180711274367, "learning_rate": 9.411657940955858e-06, "loss": 17.7825, "step": 9919 }, { "epoch": 0.18132962874952016, "grad_norm": 6.995120372790944, "learning_rate": 9.411518621826687e-06, "loss": 17.9555, "step": 9920 }, { "epoch": 0.1813479079459667, "grad_norm": 7.139291895107338, "learning_rate": 9.411379287235527e-06, "loss": 17.5175, "step": 9921 }, { "epoch": 0.18136618714241323, "grad_norm": 7.344403626673978, "learning_rate": 9.411239937182866e-06, "loss": 17.9815, "step": 9922 }, { "epoch": 0.18138446633885974, "grad_norm": 6.886892858166152, "learning_rate": 9.411100571669192e-06, "loss": 17.9211, "step": 9923 }, { "epoch": 0.18140274553530628, "grad_norm": 6.923457077091651, "learning_rate": 9.41096119069499e-06, "loss": 18.0434, "step": 9924 }, { "epoch": 0.18142102473175278, "grad_norm": 7.035732712622207, "learning_rate": 9.410821794260756e-06, "loss": 17.9492, "step": 9925 }, { "epoch": 0.18143930392819932, "grad_norm": 7.3410971156511415, "learning_rate": 9.410682382366973e-06, "loss": 17.9117, "step": 9926 }, { "epoch": 0.18145758312464583, "grad_norm": 7.014893919851392, "learning_rate": 9.410542955014131e-06, "loss": 17.8421, "step": 9927 }, { "epoch": 0.18147586232109236, "grad_norm": 8.667741479907686, "learning_rate": 9.410403512202718e-06, "loss": 18.7418, "step": 9928 }, { "epoch": 0.1814941415175389, "grad_norm": 7.676598377016749, "learning_rate": 9.410264053933222e-06, "loss": 17.7935, "step": 9929 }, { "epoch": 0.1815124207139854, "grad_norm": 9.793170691535114, "learning_rate": 9.410124580206136e-06, "loss": 17.8563, "step": 9930 }, { "epoch": 0.18153069991043194, "grad_norm": 6.292817351753322, "learning_rate": 9.409985091021944e-06, "loss": 17.4834, "step": 9931 }, { "epoch": 0.18154897910687845, "grad_norm": 5.9271806986499005, "learning_rate": 9.409845586381139e-06, "loss": 17.2664, "step": 9932 }, { "epoch": 0.18156725830332499, "grad_norm": 6.017048522299535, "learning_rate": 9.409706066284206e-06, "loss": 17.3449, "step": 9933 }, { "epoch": 0.18158553749977152, "grad_norm": 6.069216586223563, "learning_rate": 9.409566530731638e-06, "loss": 17.3073, "step": 9934 }, { "epoch": 0.18160381669621803, "grad_norm": 6.3845350850509615, "learning_rate": 9.409426979723919e-06, "loss": 17.4427, "step": 9935 }, { "epoch": 0.18162209589266456, "grad_norm": 5.896983928031587, "learning_rate": 9.409287413261543e-06, "loss": 17.2706, "step": 9936 }, { "epoch": 0.18164037508911107, "grad_norm": 6.096302848327695, "learning_rate": 9.409147831344997e-06, "loss": 17.468, "step": 9937 }, { "epoch": 0.1816586542855576, "grad_norm": 7.979016556832373, "learning_rate": 9.40900823397477e-06, "loss": 18.0505, "step": 9938 }, { "epoch": 0.18167693348200414, "grad_norm": 7.068405087083261, "learning_rate": 9.408868621151352e-06, "loss": 17.8445, "step": 9939 }, { "epoch": 0.18169521267845065, "grad_norm": 6.774797892285199, "learning_rate": 9.408728992875233e-06, "loss": 17.6941, "step": 9940 }, { "epoch": 0.1817134918748972, "grad_norm": 6.888916109967754, "learning_rate": 9.408589349146901e-06, "loss": 17.7118, "step": 9941 }, { "epoch": 0.1817317710713437, "grad_norm": 6.737448231143625, "learning_rate": 9.408449689966845e-06, "loss": 17.4894, "step": 9942 }, { "epoch": 0.18175005026779023, "grad_norm": 6.93402269608677, "learning_rate": 9.408310015335555e-06, "loss": 17.6713, "step": 9943 }, { "epoch": 0.18176832946423674, "grad_norm": 7.468910853899415, "learning_rate": 9.408170325253524e-06, "loss": 17.9388, "step": 9944 }, { "epoch": 0.18178660866068327, "grad_norm": 6.567975554918502, "learning_rate": 9.408030619721235e-06, "loss": 17.6042, "step": 9945 }, { "epoch": 0.1818048878571298, "grad_norm": 7.991804699293005, "learning_rate": 9.407890898739182e-06, "loss": 18.2704, "step": 9946 }, { "epoch": 0.18182316705357632, "grad_norm": 6.919307371545031, "learning_rate": 9.407751162307855e-06, "loss": 17.7989, "step": 9947 }, { "epoch": 0.18184144625002285, "grad_norm": 6.082884753677575, "learning_rate": 9.407611410427742e-06, "loss": 17.2717, "step": 9948 }, { "epoch": 0.18185972544646936, "grad_norm": 7.396876469407254, "learning_rate": 9.407471643099333e-06, "loss": 17.7669, "step": 9949 }, { "epoch": 0.1818780046429159, "grad_norm": 6.193139067930993, "learning_rate": 9.407331860323118e-06, "loss": 17.1319, "step": 9950 }, { "epoch": 0.18189628383936243, "grad_norm": 7.05041214785699, "learning_rate": 9.407192062099589e-06, "loss": 17.6034, "step": 9951 }, { "epoch": 0.18191456303580894, "grad_norm": 7.293537176686419, "learning_rate": 9.407052248429234e-06, "loss": 17.9462, "step": 9952 }, { "epoch": 0.18193284223225548, "grad_norm": 5.572292741725751, "learning_rate": 9.406912419312543e-06, "loss": 17.0839, "step": 9953 }, { "epoch": 0.18195112142870198, "grad_norm": 5.805385138851213, "learning_rate": 9.406772574750006e-06, "loss": 17.2494, "step": 9954 }, { "epoch": 0.18196940062514852, "grad_norm": 7.218236653398646, "learning_rate": 9.406632714742115e-06, "loss": 17.6228, "step": 9955 }, { "epoch": 0.18198767982159506, "grad_norm": 5.406273098967501, "learning_rate": 9.406492839289355e-06, "loss": 17.088, "step": 9956 }, { "epoch": 0.18200595901804156, "grad_norm": 8.057469465486134, "learning_rate": 9.406352948392224e-06, "loss": 18.4128, "step": 9957 }, { "epoch": 0.1820242382144881, "grad_norm": 8.223652800163661, "learning_rate": 9.406213042051207e-06, "loss": 17.7673, "step": 9958 }, { "epoch": 0.1820425174109346, "grad_norm": 6.334895683420072, "learning_rate": 9.406073120266794e-06, "loss": 17.4154, "step": 9959 }, { "epoch": 0.18206079660738114, "grad_norm": 7.323503860436332, "learning_rate": 9.405933183039479e-06, "loss": 18.0966, "step": 9960 }, { "epoch": 0.18207907580382765, "grad_norm": 6.70794441563754, "learning_rate": 9.40579323036975e-06, "loss": 17.5014, "step": 9961 }, { "epoch": 0.18209735500027419, "grad_norm": 7.085619004783449, "learning_rate": 9.405653262258097e-06, "loss": 17.9702, "step": 9962 }, { "epoch": 0.18211563419672072, "grad_norm": 7.806110273938615, "learning_rate": 9.405513278705013e-06, "loss": 18.001, "step": 9963 }, { "epoch": 0.18213391339316723, "grad_norm": 6.311410983703041, "learning_rate": 9.405373279710988e-06, "loss": 17.3548, "step": 9964 }, { "epoch": 0.18215219258961377, "grad_norm": 7.53607579749945, "learning_rate": 9.40523326527651e-06, "loss": 17.7716, "step": 9965 }, { "epoch": 0.18217047178606027, "grad_norm": 7.118059738999945, "learning_rate": 9.405093235402072e-06, "loss": 17.9583, "step": 9966 }, { "epoch": 0.1821887509825068, "grad_norm": 6.554212662623746, "learning_rate": 9.404953190088165e-06, "loss": 17.8426, "step": 9967 }, { "epoch": 0.18220703017895334, "grad_norm": 5.7344565315641916, "learning_rate": 9.40481312933528e-06, "loss": 17.0365, "step": 9968 }, { "epoch": 0.18222530937539985, "grad_norm": 7.77142609011555, "learning_rate": 9.404673053143905e-06, "loss": 17.7052, "step": 9969 }, { "epoch": 0.1822435885718464, "grad_norm": 6.707515501917969, "learning_rate": 9.404532961514536e-06, "loss": 17.8102, "step": 9970 }, { "epoch": 0.1822618677682929, "grad_norm": 5.616284311175807, "learning_rate": 9.40439285444766e-06, "loss": 17.2327, "step": 9971 }, { "epoch": 0.18228014696473943, "grad_norm": 7.549665088110754, "learning_rate": 9.404252731943768e-06, "loss": 17.9735, "step": 9972 }, { "epoch": 0.18229842616118597, "grad_norm": 8.599801538164002, "learning_rate": 9.404112594003353e-06, "loss": 18.1074, "step": 9973 }, { "epoch": 0.18231670535763247, "grad_norm": 7.606886871850189, "learning_rate": 9.403972440626907e-06, "loss": 18.0169, "step": 9974 }, { "epoch": 0.182334984554079, "grad_norm": 6.391215479650936, "learning_rate": 9.403832271814918e-06, "loss": 17.5384, "step": 9975 }, { "epoch": 0.18235326375052552, "grad_norm": 5.869188700593644, "learning_rate": 9.40369208756788e-06, "loss": 17.2025, "step": 9976 }, { "epoch": 0.18237154294697205, "grad_norm": 7.249672643884424, "learning_rate": 9.403551887886282e-06, "loss": 17.8932, "step": 9977 }, { "epoch": 0.18238982214341856, "grad_norm": 7.626645051386975, "learning_rate": 9.403411672770618e-06, "loss": 17.8717, "step": 9978 }, { "epoch": 0.1824081013398651, "grad_norm": 7.5216968414726955, "learning_rate": 9.403271442221378e-06, "loss": 18.1983, "step": 9979 }, { "epoch": 0.18242638053631163, "grad_norm": 6.94418319476342, "learning_rate": 9.403131196239053e-06, "loss": 17.8647, "step": 9980 }, { "epoch": 0.18244465973275814, "grad_norm": 7.65154381549128, "learning_rate": 9.402990934824137e-06, "loss": 17.8206, "step": 9981 }, { "epoch": 0.18246293892920468, "grad_norm": 7.73998953084367, "learning_rate": 9.402850657977119e-06, "loss": 17.909, "step": 9982 }, { "epoch": 0.18248121812565118, "grad_norm": 7.880188873447152, "learning_rate": 9.40271036569849e-06, "loss": 17.5407, "step": 9983 }, { "epoch": 0.18249949732209772, "grad_norm": 6.521102938355329, "learning_rate": 9.402570057988746e-06, "loss": 17.4695, "step": 9984 }, { "epoch": 0.18251777651854426, "grad_norm": 7.2910739138281295, "learning_rate": 9.402429734848374e-06, "loss": 17.7697, "step": 9985 }, { "epoch": 0.18253605571499076, "grad_norm": 7.031356327043205, "learning_rate": 9.402289396277869e-06, "loss": 17.8313, "step": 9986 }, { "epoch": 0.1825543349114373, "grad_norm": 7.243873441445957, "learning_rate": 9.40214904227772e-06, "loss": 17.6849, "step": 9987 }, { "epoch": 0.1825726141078838, "grad_norm": 7.303002672344109, "learning_rate": 9.402008672848422e-06, "loss": 17.6079, "step": 9988 }, { "epoch": 0.18259089330433034, "grad_norm": 5.444014869557507, "learning_rate": 9.401868287990465e-06, "loss": 17.0645, "step": 9989 }, { "epoch": 0.18260917250077688, "grad_norm": 7.635553170981905, "learning_rate": 9.401727887704341e-06, "loss": 18.2581, "step": 9990 }, { "epoch": 0.1826274516972234, "grad_norm": 6.646319730785736, "learning_rate": 9.401587471990544e-06, "loss": 17.7106, "step": 9991 }, { "epoch": 0.18264573089366992, "grad_norm": 6.3913214185636855, "learning_rate": 9.401447040849565e-06, "loss": 17.4812, "step": 9992 }, { "epoch": 0.18266401009011643, "grad_norm": 6.793394615008496, "learning_rate": 9.401306594281896e-06, "loss": 17.6085, "step": 9993 }, { "epoch": 0.18268228928656297, "grad_norm": 5.887989498671422, "learning_rate": 9.401166132288028e-06, "loss": 17.2802, "step": 9994 }, { "epoch": 0.18270056848300947, "grad_norm": 6.55002944821401, "learning_rate": 9.401025654868455e-06, "loss": 17.7171, "step": 9995 }, { "epoch": 0.182718847679456, "grad_norm": 7.108601306973718, "learning_rate": 9.40088516202367e-06, "loss": 18.119, "step": 9996 }, { "epoch": 0.18273712687590254, "grad_norm": 7.148578925418961, "learning_rate": 9.400744653754164e-06, "loss": 17.9143, "step": 9997 }, { "epoch": 0.18275540607234905, "grad_norm": 7.107843541409638, "learning_rate": 9.400604130060429e-06, "loss": 17.6363, "step": 9998 }, { "epoch": 0.1827736852687956, "grad_norm": 6.141869069173877, "learning_rate": 9.400463590942959e-06, "loss": 17.3609, "step": 9999 }, { "epoch": 0.1827919644652421, "grad_norm": 6.372164605878134, "learning_rate": 9.400323036402246e-06, "loss": 17.2801, "step": 10000 }, { "epoch": 0.18281024366168863, "grad_norm": 7.07052019562697, "learning_rate": 9.400182466438783e-06, "loss": 17.6534, "step": 10001 }, { "epoch": 0.18282852285813517, "grad_norm": 6.7555910254831915, "learning_rate": 9.400041881053062e-06, "loss": 17.7892, "step": 10002 }, { "epoch": 0.18284680205458168, "grad_norm": 5.703967614405472, "learning_rate": 9.399901280245576e-06, "loss": 17.1553, "step": 10003 }, { "epoch": 0.1828650812510282, "grad_norm": 7.350236892808791, "learning_rate": 9.399760664016817e-06, "loss": 17.7499, "step": 10004 }, { "epoch": 0.18288336044747472, "grad_norm": 9.360314148330353, "learning_rate": 9.399620032367279e-06, "loss": 18.4034, "step": 10005 }, { "epoch": 0.18290163964392125, "grad_norm": 6.742925435216869, "learning_rate": 9.399479385297456e-06, "loss": 17.5546, "step": 10006 }, { "epoch": 0.1829199188403678, "grad_norm": 7.9932538496041, "learning_rate": 9.399338722807838e-06, "loss": 18.0338, "step": 10007 }, { "epoch": 0.1829381980368143, "grad_norm": 5.844579655978202, "learning_rate": 9.39919804489892e-06, "loss": 17.2263, "step": 10008 }, { "epoch": 0.18295647723326083, "grad_norm": 7.339076586709035, "learning_rate": 9.399057351571194e-06, "loss": 17.9018, "step": 10009 }, { "epoch": 0.18297475642970734, "grad_norm": 6.562142229656559, "learning_rate": 9.398916642825155e-06, "loss": 17.3051, "step": 10010 }, { "epoch": 0.18299303562615388, "grad_norm": 5.616188088521416, "learning_rate": 9.398775918661295e-06, "loss": 17.1079, "step": 10011 }, { "epoch": 0.18301131482260038, "grad_norm": 6.6211991045661165, "learning_rate": 9.398635179080105e-06, "loss": 17.5124, "step": 10012 }, { "epoch": 0.18302959401904692, "grad_norm": 6.806000524611527, "learning_rate": 9.398494424082082e-06, "loss": 17.4438, "step": 10013 }, { "epoch": 0.18304787321549346, "grad_norm": 7.556791874915172, "learning_rate": 9.398353653667719e-06, "loss": 17.9809, "step": 10014 }, { "epoch": 0.18306615241193996, "grad_norm": 6.576924936052816, "learning_rate": 9.398212867837505e-06, "loss": 17.6573, "step": 10015 }, { "epoch": 0.1830844316083865, "grad_norm": 6.834198367818092, "learning_rate": 9.398072066591937e-06, "loss": 17.6258, "step": 10016 }, { "epoch": 0.183102710804833, "grad_norm": 6.899273350066197, "learning_rate": 9.39793124993151e-06, "loss": 17.6503, "step": 10017 }, { "epoch": 0.18312099000127954, "grad_norm": 7.19435807009442, "learning_rate": 9.397790417856714e-06, "loss": 17.7391, "step": 10018 }, { "epoch": 0.18313926919772608, "grad_norm": 5.963027513530443, "learning_rate": 9.397649570368046e-06, "loss": 17.2179, "step": 10019 }, { "epoch": 0.1831575483941726, "grad_norm": 6.606682036623378, "learning_rate": 9.397508707465997e-06, "loss": 17.327, "step": 10020 }, { "epoch": 0.18317582759061912, "grad_norm": 8.032489710799243, "learning_rate": 9.39736782915106e-06, "loss": 17.8206, "step": 10021 }, { "epoch": 0.18319410678706563, "grad_norm": 5.835923644342947, "learning_rate": 9.397226935423734e-06, "loss": 17.0689, "step": 10022 }, { "epoch": 0.18321238598351217, "grad_norm": 6.729184548387186, "learning_rate": 9.397086026284505e-06, "loss": 17.6471, "step": 10023 }, { "epoch": 0.1832306651799587, "grad_norm": 7.4706077757716285, "learning_rate": 9.396945101733874e-06, "loss": 17.8337, "step": 10024 }, { "epoch": 0.1832489443764052, "grad_norm": 9.125053382198983, "learning_rate": 9.396804161772331e-06, "loss": 17.1154, "step": 10025 }, { "epoch": 0.18326722357285175, "grad_norm": 6.067168561109368, "learning_rate": 9.396663206400372e-06, "loss": 17.4188, "step": 10026 }, { "epoch": 0.18328550276929825, "grad_norm": 5.82922530940505, "learning_rate": 9.396522235618488e-06, "loss": 17.2414, "step": 10027 }, { "epoch": 0.1833037819657448, "grad_norm": 6.557609650978757, "learning_rate": 9.396381249427176e-06, "loss": 17.5663, "step": 10028 }, { "epoch": 0.1833220611621913, "grad_norm": 7.821631448805034, "learning_rate": 9.396240247826929e-06, "loss": 17.898, "step": 10029 }, { "epoch": 0.18334034035863783, "grad_norm": 6.802719553007564, "learning_rate": 9.39609923081824e-06, "loss": 17.6314, "step": 10030 }, { "epoch": 0.18335861955508437, "grad_norm": 6.593847507280106, "learning_rate": 9.395958198401608e-06, "loss": 18.0311, "step": 10031 }, { "epoch": 0.18337689875153088, "grad_norm": 6.582581645853874, "learning_rate": 9.395817150577522e-06, "loss": 17.6809, "step": 10032 }, { "epoch": 0.1833951779479774, "grad_norm": 6.975542477321055, "learning_rate": 9.395676087346478e-06, "loss": 18.0356, "step": 10033 }, { "epoch": 0.18341345714442392, "grad_norm": 7.001343203133638, "learning_rate": 9.395535008708972e-06, "loss": 17.8048, "step": 10034 }, { "epoch": 0.18343173634087045, "grad_norm": 7.530149791963838, "learning_rate": 9.395393914665496e-06, "loss": 18.0788, "step": 10035 }, { "epoch": 0.183450015537317, "grad_norm": 11.947231797910732, "learning_rate": 9.395252805216545e-06, "loss": 18.8291, "step": 10036 }, { "epoch": 0.1834682947337635, "grad_norm": 8.45783714915619, "learning_rate": 9.395111680362616e-06, "loss": 18.9009, "step": 10037 }, { "epoch": 0.18348657393021003, "grad_norm": 6.250795698995749, "learning_rate": 9.394970540104203e-06, "loss": 17.2324, "step": 10038 }, { "epoch": 0.18350485312665654, "grad_norm": 6.887931607530826, "learning_rate": 9.394829384441796e-06, "loss": 17.6447, "step": 10039 }, { "epoch": 0.18352313232310308, "grad_norm": 6.881663556628881, "learning_rate": 9.394688213375897e-06, "loss": 17.4677, "step": 10040 }, { "epoch": 0.1835414115195496, "grad_norm": 7.094733645881841, "learning_rate": 9.394547026906996e-06, "loss": 17.7156, "step": 10041 }, { "epoch": 0.18355969071599612, "grad_norm": 6.721369910362387, "learning_rate": 9.394405825035588e-06, "loss": 17.5653, "step": 10042 }, { "epoch": 0.18357796991244266, "grad_norm": 7.905659665079018, "learning_rate": 9.394264607762171e-06, "loss": 18.1893, "step": 10043 }, { "epoch": 0.18359624910888916, "grad_norm": 8.182496878526997, "learning_rate": 9.394123375087236e-06, "loss": 18.1963, "step": 10044 }, { "epoch": 0.1836145283053357, "grad_norm": 5.924467389440648, "learning_rate": 9.39398212701128e-06, "loss": 17.3593, "step": 10045 }, { "epoch": 0.1836328075017822, "grad_norm": 7.158563543479542, "learning_rate": 9.393840863534798e-06, "loss": 17.7377, "step": 10046 }, { "epoch": 0.18365108669822874, "grad_norm": 6.6072075670565855, "learning_rate": 9.393699584658287e-06, "loss": 17.5862, "step": 10047 }, { "epoch": 0.18366936589467528, "grad_norm": 6.610929634307579, "learning_rate": 9.393558290382238e-06, "loss": 17.4893, "step": 10048 }, { "epoch": 0.1836876450911218, "grad_norm": 7.360027450851649, "learning_rate": 9.393416980707148e-06, "loss": 17.7952, "step": 10049 }, { "epoch": 0.18370592428756832, "grad_norm": 5.487437849285093, "learning_rate": 9.393275655633515e-06, "loss": 17.0889, "step": 10050 }, { "epoch": 0.18372420348401483, "grad_norm": 7.046362910788258, "learning_rate": 9.393134315161832e-06, "loss": 17.9629, "step": 10051 }, { "epoch": 0.18374248268046137, "grad_norm": 8.130850826922257, "learning_rate": 9.392992959292593e-06, "loss": 17.8732, "step": 10052 }, { "epoch": 0.1837607618769079, "grad_norm": 7.145303003288416, "learning_rate": 9.392851588026295e-06, "loss": 17.8343, "step": 10053 }, { "epoch": 0.1837790410733544, "grad_norm": 7.878470503338938, "learning_rate": 9.392710201363433e-06, "loss": 17.9847, "step": 10054 }, { "epoch": 0.18379732026980095, "grad_norm": 7.385497631069745, "learning_rate": 9.392568799304504e-06, "loss": 18.0775, "step": 10055 }, { "epoch": 0.18381559946624745, "grad_norm": 7.56836478360785, "learning_rate": 9.392427381850002e-06, "loss": 18.3344, "step": 10056 }, { "epoch": 0.183833878662694, "grad_norm": 6.030781229462143, "learning_rate": 9.392285949000422e-06, "loss": 17.0676, "step": 10057 }, { "epoch": 0.18385215785914052, "grad_norm": 6.762860666840852, "learning_rate": 9.392144500756261e-06, "loss": 17.5847, "step": 10058 }, { "epoch": 0.18387043705558703, "grad_norm": 8.958143732662874, "learning_rate": 9.392003037118018e-06, "loss": 18.4871, "step": 10059 }, { "epoch": 0.18388871625203357, "grad_norm": 7.639826919790793, "learning_rate": 9.391861558086183e-06, "loss": 18.2061, "step": 10060 }, { "epoch": 0.18390699544848008, "grad_norm": 5.734010063500707, "learning_rate": 9.391720063661253e-06, "loss": 17.0315, "step": 10061 }, { "epoch": 0.1839252746449266, "grad_norm": 8.518019074220268, "learning_rate": 9.391578553843727e-06, "loss": 17.5116, "step": 10062 }, { "epoch": 0.18394355384137312, "grad_norm": 5.843246554835414, "learning_rate": 9.3914370286341e-06, "loss": 17.3507, "step": 10063 }, { "epoch": 0.18396183303781966, "grad_norm": 5.959346547632752, "learning_rate": 9.391295488032866e-06, "loss": 17.3375, "step": 10064 }, { "epoch": 0.1839801122342662, "grad_norm": 8.566221840609426, "learning_rate": 9.391153932040524e-06, "loss": 18.4116, "step": 10065 }, { "epoch": 0.1839983914307127, "grad_norm": 7.258289236998518, "learning_rate": 9.391012360657567e-06, "loss": 17.9535, "step": 10066 }, { "epoch": 0.18401667062715923, "grad_norm": 8.74913373707712, "learning_rate": 9.390870773884493e-06, "loss": 18.2529, "step": 10067 }, { "epoch": 0.18403494982360574, "grad_norm": 6.276195917759392, "learning_rate": 9.390729171721797e-06, "loss": 17.5967, "step": 10068 }, { "epoch": 0.18405322902005228, "grad_norm": 6.043802023406444, "learning_rate": 9.390587554169978e-06, "loss": 17.4002, "step": 10069 }, { "epoch": 0.1840715082164988, "grad_norm": 7.056645386674894, "learning_rate": 9.390445921229529e-06, "loss": 17.681, "step": 10070 }, { "epoch": 0.18408978741294532, "grad_norm": 6.877355921850625, "learning_rate": 9.390304272900949e-06, "loss": 17.8717, "step": 10071 }, { "epoch": 0.18410806660939186, "grad_norm": 7.529191829145646, "learning_rate": 9.390162609184735e-06, "loss": 18.1924, "step": 10072 }, { "epoch": 0.18412634580583837, "grad_norm": 7.1874899146623905, "learning_rate": 9.390020930081378e-06, "loss": 17.9067, "step": 10073 }, { "epoch": 0.1841446250022849, "grad_norm": 4.955860626321352, "learning_rate": 9.389879235591381e-06, "loss": 16.8302, "step": 10074 }, { "epoch": 0.18416290419873144, "grad_norm": 7.089664707773817, "learning_rate": 9.38973752571524e-06, "loss": 17.7532, "step": 10075 }, { "epoch": 0.18418118339517794, "grad_norm": 6.880420490934243, "learning_rate": 9.389595800453447e-06, "loss": 17.7116, "step": 10076 }, { "epoch": 0.18419946259162448, "grad_norm": 7.391470423927875, "learning_rate": 9.389454059806502e-06, "loss": 18.0362, "step": 10077 }, { "epoch": 0.184217741788071, "grad_norm": 8.1552671824139, "learning_rate": 9.389312303774902e-06, "loss": 18.1661, "step": 10078 }, { "epoch": 0.18423602098451752, "grad_norm": 4.865258771724745, "learning_rate": 9.389170532359145e-06, "loss": 16.8387, "step": 10079 }, { "epoch": 0.18425430018096403, "grad_norm": 6.256518850905666, "learning_rate": 9.389028745559724e-06, "loss": 17.465, "step": 10080 }, { "epoch": 0.18427257937741057, "grad_norm": 7.334441424716792, "learning_rate": 9.388886943377139e-06, "loss": 17.9603, "step": 10081 }, { "epoch": 0.1842908585738571, "grad_norm": 6.080709347340776, "learning_rate": 9.388745125811884e-06, "loss": 17.4325, "step": 10082 }, { "epoch": 0.1843091377703036, "grad_norm": 7.740074002067427, "learning_rate": 9.38860329286446e-06, "loss": 17.9683, "step": 10083 }, { "epoch": 0.18432741696675015, "grad_norm": 6.758180192876033, "learning_rate": 9.388461444535364e-06, "loss": 17.5877, "step": 10084 }, { "epoch": 0.18434569616319665, "grad_norm": 7.360070317757277, "learning_rate": 9.38831958082509e-06, "loss": 18.0347, "step": 10085 }, { "epoch": 0.1843639753596432, "grad_norm": 6.233753484045297, "learning_rate": 9.388177701734135e-06, "loss": 17.5804, "step": 10086 }, { "epoch": 0.18438225455608973, "grad_norm": 7.317271295294663, "learning_rate": 9.388035807263e-06, "loss": 18.0553, "step": 10087 }, { "epoch": 0.18440053375253623, "grad_norm": 6.537674890608169, "learning_rate": 9.38789389741218e-06, "loss": 17.4096, "step": 10088 }, { "epoch": 0.18441881294898277, "grad_norm": 6.928024621313798, "learning_rate": 9.387751972182171e-06, "loss": 17.7727, "step": 10089 }, { "epoch": 0.18443709214542928, "grad_norm": 7.5708364315683, "learning_rate": 9.387610031573474e-06, "loss": 17.839, "step": 10090 }, { "epoch": 0.1844553713418758, "grad_norm": 7.908171570383844, "learning_rate": 9.387468075586583e-06, "loss": 18.1779, "step": 10091 }, { "epoch": 0.18447365053832235, "grad_norm": 5.5242661833756435, "learning_rate": 9.387326104221999e-06, "loss": 17.1945, "step": 10092 }, { "epoch": 0.18449192973476886, "grad_norm": 5.186178471195878, "learning_rate": 9.387184117480217e-06, "loss": 16.9887, "step": 10093 }, { "epoch": 0.1845102089312154, "grad_norm": 6.9561430161599755, "learning_rate": 9.387042115361735e-06, "loss": 17.9478, "step": 10094 }, { "epoch": 0.1845284881276619, "grad_norm": 7.4253635627651775, "learning_rate": 9.38690009786705e-06, "loss": 18.0151, "step": 10095 }, { "epoch": 0.18454676732410843, "grad_norm": 6.854517931928672, "learning_rate": 9.386758064996663e-06, "loss": 17.6601, "step": 10096 }, { "epoch": 0.18456504652055494, "grad_norm": 7.891688813940142, "learning_rate": 9.386616016751069e-06, "loss": 18.3511, "step": 10097 }, { "epoch": 0.18458332571700148, "grad_norm": 6.971177894420033, "learning_rate": 9.386473953130766e-06, "loss": 17.6837, "step": 10098 }, { "epoch": 0.18460160491344801, "grad_norm": 7.559625105872039, "learning_rate": 9.386331874136252e-06, "loss": 17.6225, "step": 10099 }, { "epoch": 0.18461988410989452, "grad_norm": 9.163406483025124, "learning_rate": 9.386189779768026e-06, "loss": 18.6586, "step": 10100 }, { "epoch": 0.18463816330634106, "grad_norm": 6.754811481736553, "learning_rate": 9.386047670026585e-06, "loss": 17.5206, "step": 10101 }, { "epoch": 0.18465644250278757, "grad_norm": 6.034743620294371, "learning_rate": 9.385905544912427e-06, "loss": 17.3234, "step": 10102 }, { "epoch": 0.1846747216992341, "grad_norm": 6.749802836729946, "learning_rate": 9.385763404426053e-06, "loss": 17.4607, "step": 10103 }, { "epoch": 0.18469300089568064, "grad_norm": 8.09839861060369, "learning_rate": 9.385621248567957e-06, "loss": 18.2918, "step": 10104 }, { "epoch": 0.18471128009212714, "grad_norm": 6.523424208415734, "learning_rate": 9.38547907733864e-06, "loss": 17.3888, "step": 10105 }, { "epoch": 0.18472955928857368, "grad_norm": 7.515683686348755, "learning_rate": 9.385336890738599e-06, "loss": 18.2368, "step": 10106 }, { "epoch": 0.1847478384850202, "grad_norm": 6.969248317430007, "learning_rate": 9.385194688768334e-06, "loss": 17.6281, "step": 10107 }, { "epoch": 0.18476611768146672, "grad_norm": 6.434898297464353, "learning_rate": 9.38505247142834e-06, "loss": 17.5382, "step": 10108 }, { "epoch": 0.18478439687791326, "grad_norm": 5.989770127717803, "learning_rate": 9.384910238719119e-06, "loss": 17.2119, "step": 10109 }, { "epoch": 0.18480267607435977, "grad_norm": 5.946040994438568, "learning_rate": 9.384767990641166e-06, "loss": 17.28, "step": 10110 }, { "epoch": 0.1848209552708063, "grad_norm": 7.4997105516833775, "learning_rate": 9.384625727194983e-06, "loss": 18.1866, "step": 10111 }, { "epoch": 0.1848392344672528, "grad_norm": 10.378955170882492, "learning_rate": 9.384483448381068e-06, "loss": 17.5535, "step": 10112 }, { "epoch": 0.18485751366369935, "grad_norm": 8.133534058588747, "learning_rate": 9.384341154199918e-06, "loss": 17.8558, "step": 10113 }, { "epoch": 0.18487579286014585, "grad_norm": 5.925984036602524, "learning_rate": 9.384198844652034e-06, "loss": 17.2775, "step": 10114 }, { "epoch": 0.1848940720565924, "grad_norm": 8.087471806115738, "learning_rate": 9.384056519737912e-06, "loss": 17.8861, "step": 10115 }, { "epoch": 0.18491235125303893, "grad_norm": 7.360788970198534, "learning_rate": 9.383914179458053e-06, "loss": 17.9566, "step": 10116 }, { "epoch": 0.18493063044948543, "grad_norm": 7.029999292910309, "learning_rate": 9.383771823812957e-06, "loss": 17.6375, "step": 10117 }, { "epoch": 0.18494890964593197, "grad_norm": 5.860795062387218, "learning_rate": 9.383629452803118e-06, "loss": 17.3074, "step": 10118 }, { "epoch": 0.18496718884237848, "grad_norm": 6.72733824557979, "learning_rate": 9.38348706642904e-06, "loss": 17.6756, "step": 10119 }, { "epoch": 0.184985468038825, "grad_norm": 6.348613506593034, "learning_rate": 9.38334466469122e-06, "loss": 17.251, "step": 10120 }, { "epoch": 0.18500374723527155, "grad_norm": 6.4407131573845025, "learning_rate": 9.383202247590157e-06, "loss": 17.4239, "step": 10121 }, { "epoch": 0.18502202643171806, "grad_norm": 7.66417138062931, "learning_rate": 9.38305981512635e-06, "loss": 18.2959, "step": 10122 }, { "epoch": 0.1850403056281646, "grad_norm": 8.096402083085778, "learning_rate": 9.3829173673003e-06, "loss": 18.2023, "step": 10123 }, { "epoch": 0.1850585848246111, "grad_norm": 7.441095682577782, "learning_rate": 9.382774904112505e-06, "loss": 17.9975, "step": 10124 }, { "epoch": 0.18507686402105764, "grad_norm": 5.874787975602297, "learning_rate": 9.382632425563462e-06, "loss": 17.1499, "step": 10125 }, { "epoch": 0.18509514321750417, "grad_norm": 8.396712904272395, "learning_rate": 9.382489931653675e-06, "loss": 18.1694, "step": 10126 }, { "epoch": 0.18511342241395068, "grad_norm": 6.880236844657977, "learning_rate": 9.38234742238364e-06, "loss": 17.9352, "step": 10127 }, { "epoch": 0.18513170161039721, "grad_norm": 6.891231969914025, "learning_rate": 9.38220489775386e-06, "loss": 17.673, "step": 10128 }, { "epoch": 0.18514998080684372, "grad_norm": 7.265333835185887, "learning_rate": 9.382062357764828e-06, "loss": 17.8743, "step": 10129 }, { "epoch": 0.18516826000329026, "grad_norm": 6.99979969720114, "learning_rate": 9.38191980241705e-06, "loss": 17.6382, "step": 10130 }, { "epoch": 0.18518653919973677, "grad_norm": 8.187962009688826, "learning_rate": 9.381777231711024e-06, "loss": 18.2169, "step": 10131 }, { "epoch": 0.1852048183961833, "grad_norm": 8.05353103911642, "learning_rate": 9.381634645647247e-06, "loss": 18.227, "step": 10132 }, { "epoch": 0.18522309759262984, "grad_norm": 7.505607921149841, "learning_rate": 9.38149204422622e-06, "loss": 17.8716, "step": 10133 }, { "epoch": 0.18524137678907635, "grad_norm": 6.635816173964813, "learning_rate": 9.381349427448448e-06, "loss": 17.619, "step": 10134 }, { "epoch": 0.18525965598552288, "grad_norm": 7.975934284092967, "learning_rate": 9.381206795314424e-06, "loss": 18.2861, "step": 10135 }, { "epoch": 0.1852779351819694, "grad_norm": 7.2156964395727945, "learning_rate": 9.38106414782465e-06, "loss": 17.5814, "step": 10136 }, { "epoch": 0.18529621437841592, "grad_norm": 6.413651784388292, "learning_rate": 9.380921484979626e-06, "loss": 17.7215, "step": 10137 }, { "epoch": 0.18531449357486246, "grad_norm": 6.167896859875163, "learning_rate": 9.380778806779853e-06, "loss": 17.3194, "step": 10138 }, { "epoch": 0.18533277277130897, "grad_norm": 10.13812019241804, "learning_rate": 9.38063611322583e-06, "loss": 18.9814, "step": 10139 }, { "epoch": 0.1853510519677555, "grad_norm": 6.022633303778132, "learning_rate": 9.380493404318059e-06, "loss": 17.2726, "step": 10140 }, { "epoch": 0.185369331164202, "grad_norm": 8.295729304229326, "learning_rate": 9.380350680057038e-06, "loss": 18.1683, "step": 10141 }, { "epoch": 0.18538761036064855, "grad_norm": 7.2665935336368, "learning_rate": 9.380207940443266e-06, "loss": 18.1368, "step": 10142 }, { "epoch": 0.18540588955709508, "grad_norm": 6.446637878102153, "learning_rate": 9.380065185477247e-06, "loss": 17.5363, "step": 10143 }, { "epoch": 0.1854241687535416, "grad_norm": 6.383097911432984, "learning_rate": 9.379922415159479e-06, "loss": 17.4873, "step": 10144 }, { "epoch": 0.18544244794998813, "grad_norm": 6.344580774345908, "learning_rate": 9.379779629490463e-06, "loss": 17.3647, "step": 10145 }, { "epoch": 0.18546072714643463, "grad_norm": 7.348473901289618, "learning_rate": 9.379636828470702e-06, "loss": 17.9987, "step": 10146 }, { "epoch": 0.18547900634288117, "grad_norm": 6.160850989268245, "learning_rate": 9.379494012100691e-06, "loss": 17.3402, "step": 10147 }, { "epoch": 0.18549728553932768, "grad_norm": 5.993419326775649, "learning_rate": 9.379351180380934e-06, "loss": 17.3536, "step": 10148 }, { "epoch": 0.1855155647357742, "grad_norm": 8.509094960522217, "learning_rate": 9.379208333311932e-06, "loss": 17.8672, "step": 10149 }, { "epoch": 0.18553384393222075, "grad_norm": 7.807610592514754, "learning_rate": 9.379065470894185e-06, "loss": 18.1017, "step": 10150 }, { "epoch": 0.18555212312866726, "grad_norm": 7.123159893464457, "learning_rate": 9.378922593128192e-06, "loss": 17.5521, "step": 10151 }, { "epoch": 0.1855704023251138, "grad_norm": 7.48066657787623, "learning_rate": 9.378779700014457e-06, "loss": 18.2265, "step": 10152 }, { "epoch": 0.1855886815215603, "grad_norm": 6.13410547093554, "learning_rate": 9.378636791553479e-06, "loss": 17.1917, "step": 10153 }, { "epoch": 0.18560696071800684, "grad_norm": 7.5276101799697654, "learning_rate": 9.378493867745757e-06, "loss": 17.8194, "step": 10154 }, { "epoch": 0.18562523991445337, "grad_norm": 7.534516614694993, "learning_rate": 9.378350928591795e-06, "loss": 18.3422, "step": 10155 }, { "epoch": 0.18564351911089988, "grad_norm": 7.091677494357677, "learning_rate": 9.378207974092094e-06, "loss": 17.7917, "step": 10156 }, { "epoch": 0.18566179830734642, "grad_norm": 8.561682749273809, "learning_rate": 9.378065004247154e-06, "loss": 18.1131, "step": 10157 }, { "epoch": 0.18568007750379292, "grad_norm": 5.976930234644536, "learning_rate": 9.377922019057475e-06, "loss": 17.1155, "step": 10158 }, { "epoch": 0.18569835670023946, "grad_norm": 7.4921041502776005, "learning_rate": 9.377779018523558e-06, "loss": 18.0279, "step": 10159 }, { "epoch": 0.185716635896686, "grad_norm": 7.823224316133014, "learning_rate": 9.377636002645907e-06, "loss": 17.7456, "step": 10160 }, { "epoch": 0.1857349150931325, "grad_norm": 6.3364572003429585, "learning_rate": 9.377492971425022e-06, "loss": 17.4062, "step": 10161 }, { "epoch": 0.18575319428957904, "grad_norm": 5.9663246026209995, "learning_rate": 9.377349924861404e-06, "loss": 17.2638, "step": 10162 }, { "epoch": 0.18577147348602555, "grad_norm": 6.679882467011436, "learning_rate": 9.377206862955554e-06, "loss": 17.5757, "step": 10163 }, { "epoch": 0.18578975268247208, "grad_norm": 6.940412229607728, "learning_rate": 9.377063785707974e-06, "loss": 17.6697, "step": 10164 }, { "epoch": 0.1858080318789186, "grad_norm": 6.6646624172232105, "learning_rate": 9.376920693119164e-06, "loss": 17.6532, "step": 10165 }, { "epoch": 0.18582631107536512, "grad_norm": 7.239341796820249, "learning_rate": 9.376777585189629e-06, "loss": 17.9977, "step": 10166 }, { "epoch": 0.18584459027181166, "grad_norm": 7.771468138586223, "learning_rate": 9.376634461919867e-06, "loss": 18.2488, "step": 10167 }, { "epoch": 0.18586286946825817, "grad_norm": 8.85702272829546, "learning_rate": 9.37649132331038e-06, "loss": 17.7924, "step": 10168 }, { "epoch": 0.1858811486647047, "grad_norm": 7.300670685126151, "learning_rate": 9.376348169361673e-06, "loss": 17.9292, "step": 10169 }, { "epoch": 0.1858994278611512, "grad_norm": 8.538267262281579, "learning_rate": 9.376205000074243e-06, "loss": 17.9711, "step": 10170 }, { "epoch": 0.18591770705759775, "grad_norm": 6.43279408233411, "learning_rate": 9.376061815448596e-06, "loss": 17.5059, "step": 10171 }, { "epoch": 0.18593598625404428, "grad_norm": 6.055184820090015, "learning_rate": 9.375918615485231e-06, "loss": 17.1337, "step": 10172 }, { "epoch": 0.1859542654504908, "grad_norm": 7.993831665485996, "learning_rate": 9.375775400184652e-06, "loss": 17.4964, "step": 10173 }, { "epoch": 0.18597254464693733, "grad_norm": 6.944128214910914, "learning_rate": 9.37563216954736e-06, "loss": 17.7661, "step": 10174 }, { "epoch": 0.18599082384338383, "grad_norm": 7.7628760757659725, "learning_rate": 9.375488923573857e-06, "loss": 18.064, "step": 10175 }, { "epoch": 0.18600910303983037, "grad_norm": 6.433721378115287, "learning_rate": 9.375345662264644e-06, "loss": 17.7262, "step": 10176 }, { "epoch": 0.1860273822362769, "grad_norm": 6.346395656175717, "learning_rate": 9.375202385620223e-06, "loss": 17.3542, "step": 10177 }, { "epoch": 0.1860456614327234, "grad_norm": 7.679772556378218, "learning_rate": 9.375059093641099e-06, "loss": 18.1816, "step": 10178 }, { "epoch": 0.18606394062916995, "grad_norm": 6.895906852677195, "learning_rate": 9.374915786327773e-06, "loss": 17.5511, "step": 10179 }, { "epoch": 0.18608221982561646, "grad_norm": 6.281701990782323, "learning_rate": 9.374772463680745e-06, "loss": 17.4323, "step": 10180 }, { "epoch": 0.186100499022063, "grad_norm": 7.485888610640797, "learning_rate": 9.374629125700522e-06, "loss": 17.5209, "step": 10181 }, { "epoch": 0.1861187782185095, "grad_norm": 5.6333109911431904, "learning_rate": 9.374485772387602e-06, "loss": 17.2553, "step": 10182 }, { "epoch": 0.18613705741495604, "grad_norm": 7.691203740517854, "learning_rate": 9.374342403742489e-06, "loss": 17.6799, "step": 10183 }, { "epoch": 0.18615533661140257, "grad_norm": 6.906312158650607, "learning_rate": 9.374199019765685e-06, "loss": 17.5729, "step": 10184 }, { "epoch": 0.18617361580784908, "grad_norm": 6.663267416505823, "learning_rate": 9.374055620457693e-06, "loss": 17.6142, "step": 10185 }, { "epoch": 0.18619189500429562, "grad_norm": 6.339292529984208, "learning_rate": 9.373912205819016e-06, "loss": 17.4268, "step": 10186 }, { "epoch": 0.18621017420074212, "grad_norm": 7.67804688835375, "learning_rate": 9.373768775850156e-06, "loss": 17.8058, "step": 10187 }, { "epoch": 0.18622845339718866, "grad_norm": 8.012542663727075, "learning_rate": 9.373625330551617e-06, "loss": 17.9881, "step": 10188 }, { "epoch": 0.1862467325936352, "grad_norm": 7.4263693912563635, "learning_rate": 9.3734818699239e-06, "loss": 18.0399, "step": 10189 }, { "epoch": 0.1862650117900817, "grad_norm": 9.111480253737692, "learning_rate": 9.373338393967508e-06, "loss": 18.4965, "step": 10190 }, { "epoch": 0.18628329098652824, "grad_norm": 6.5584971932398375, "learning_rate": 9.373194902682945e-06, "loss": 17.6057, "step": 10191 }, { "epoch": 0.18630157018297475, "grad_norm": 7.604951707353146, "learning_rate": 9.373051396070713e-06, "loss": 18.274, "step": 10192 }, { "epoch": 0.18631984937942128, "grad_norm": 7.375005504809956, "learning_rate": 9.372907874131316e-06, "loss": 18.1264, "step": 10193 }, { "epoch": 0.18633812857586782, "grad_norm": 7.43140604220699, "learning_rate": 9.372764336865255e-06, "loss": 17.9132, "step": 10194 }, { "epoch": 0.18635640777231433, "grad_norm": 8.193563811574354, "learning_rate": 9.372620784273036e-06, "loss": 18.137, "step": 10195 }, { "epoch": 0.18637468696876086, "grad_norm": 8.009154005190014, "learning_rate": 9.372477216355158e-06, "loss": 17.9132, "step": 10196 }, { "epoch": 0.18639296616520737, "grad_norm": 7.128934812209609, "learning_rate": 9.372333633112129e-06, "loss": 18.0419, "step": 10197 }, { "epoch": 0.1864112453616539, "grad_norm": 7.317620610851088, "learning_rate": 9.37219003454445e-06, "loss": 17.5791, "step": 10198 }, { "epoch": 0.1864295245581004, "grad_norm": 6.525025639592337, "learning_rate": 9.372046420652625e-06, "loss": 17.7318, "step": 10199 }, { "epoch": 0.18644780375454695, "grad_norm": 7.735597506460469, "learning_rate": 9.371902791437155e-06, "loss": 17.8672, "step": 10200 }, { "epoch": 0.18646608295099348, "grad_norm": 8.226678540737261, "learning_rate": 9.371759146898547e-06, "loss": 17.9677, "step": 10201 }, { "epoch": 0.18648436214744, "grad_norm": 5.501891707057994, "learning_rate": 9.371615487037302e-06, "loss": 17.2741, "step": 10202 }, { "epoch": 0.18650264134388653, "grad_norm": 6.572065461356103, "learning_rate": 9.371471811853923e-06, "loss": 17.4299, "step": 10203 }, { "epoch": 0.18652092054033304, "grad_norm": 8.720931550768686, "learning_rate": 9.371328121348914e-06, "loss": 18.4159, "step": 10204 }, { "epoch": 0.18653919973677957, "grad_norm": 6.38453974087123, "learning_rate": 9.37118441552278e-06, "loss": 17.5934, "step": 10205 }, { "epoch": 0.1865574789332261, "grad_norm": 7.3772736465978515, "learning_rate": 9.371040694376026e-06, "loss": 17.9282, "step": 10206 }, { "epoch": 0.18657575812967261, "grad_norm": 8.31169856553042, "learning_rate": 9.370896957909151e-06, "loss": 18.1827, "step": 10207 }, { "epoch": 0.18659403732611915, "grad_norm": 6.2329002713871695, "learning_rate": 9.370753206122662e-06, "loss": 17.4252, "step": 10208 }, { "epoch": 0.18661231652256566, "grad_norm": 7.739947269091635, "learning_rate": 9.370609439017064e-06, "loss": 18.0942, "step": 10209 }, { "epoch": 0.1866305957190122, "grad_norm": 6.710341963329881, "learning_rate": 9.370465656592858e-06, "loss": 17.6096, "step": 10210 }, { "epoch": 0.18664887491545873, "grad_norm": 7.315130101646613, "learning_rate": 9.37032185885055e-06, "loss": 17.9044, "step": 10211 }, { "epoch": 0.18666715411190524, "grad_norm": 6.345220118476805, "learning_rate": 9.37017804579064e-06, "loss": 17.5248, "step": 10212 }, { "epoch": 0.18668543330835177, "grad_norm": 6.268704186528599, "learning_rate": 9.370034217413638e-06, "loss": 17.661, "step": 10213 }, { "epoch": 0.18670371250479828, "grad_norm": 5.01108789980958, "learning_rate": 9.369890373720044e-06, "loss": 16.9445, "step": 10214 }, { "epoch": 0.18672199170124482, "grad_norm": 5.2239857534211245, "learning_rate": 9.369746514710365e-06, "loss": 16.9543, "step": 10215 }, { "epoch": 0.18674027089769132, "grad_norm": 6.487987836666354, "learning_rate": 9.369602640385102e-06, "loss": 17.7461, "step": 10216 }, { "epoch": 0.18675855009413786, "grad_norm": 7.865169897417005, "learning_rate": 9.369458750744762e-06, "loss": 18.0864, "step": 10217 }, { "epoch": 0.1867768292905844, "grad_norm": 6.197612176392014, "learning_rate": 9.369314845789847e-06, "loss": 17.458, "step": 10218 }, { "epoch": 0.1867951084870309, "grad_norm": 7.561435627743233, "learning_rate": 9.369170925520865e-06, "loss": 18.1334, "step": 10219 }, { "epoch": 0.18681338768347744, "grad_norm": 7.44381941931186, "learning_rate": 9.369026989938318e-06, "loss": 17.7356, "step": 10220 }, { "epoch": 0.18683166687992395, "grad_norm": 7.017214885184996, "learning_rate": 9.368883039042706e-06, "loss": 17.5028, "step": 10221 }, { "epoch": 0.18684994607637048, "grad_norm": 7.229664469044334, "learning_rate": 9.368739072834543e-06, "loss": 17.5561, "step": 10222 }, { "epoch": 0.18686822527281702, "grad_norm": 7.189737803230755, "learning_rate": 9.368595091314326e-06, "loss": 18.2759, "step": 10223 }, { "epoch": 0.18688650446926353, "grad_norm": 8.319257418022968, "learning_rate": 9.368451094482564e-06, "loss": 18.3799, "step": 10224 }, { "epoch": 0.18690478366571006, "grad_norm": 6.4889484367898005, "learning_rate": 9.368307082339758e-06, "loss": 17.5709, "step": 10225 }, { "epoch": 0.18692306286215657, "grad_norm": 12.229828869233472, "learning_rate": 9.368163054886417e-06, "loss": 17.6928, "step": 10226 }, { "epoch": 0.1869413420586031, "grad_norm": 6.322971098801259, "learning_rate": 9.368019012123042e-06, "loss": 17.4152, "step": 10227 }, { "epoch": 0.18695962125504964, "grad_norm": 6.77865355835652, "learning_rate": 9.36787495405014e-06, "loss": 17.6817, "step": 10228 }, { "epoch": 0.18697790045149615, "grad_norm": 6.426308198557916, "learning_rate": 9.367730880668214e-06, "loss": 17.4643, "step": 10229 }, { "epoch": 0.18699617964794268, "grad_norm": 5.632297905265568, "learning_rate": 9.367586791977772e-06, "loss": 17.073, "step": 10230 }, { "epoch": 0.1870144588443892, "grad_norm": 6.974509567515869, "learning_rate": 9.367442687979317e-06, "loss": 17.7247, "step": 10231 }, { "epoch": 0.18703273804083573, "grad_norm": 6.83206984158418, "learning_rate": 9.367298568673354e-06, "loss": 17.5729, "step": 10232 }, { "epoch": 0.18705101723728224, "grad_norm": 9.757596180231703, "learning_rate": 9.367154434060389e-06, "loss": 18.1063, "step": 10233 }, { "epoch": 0.18706929643372877, "grad_norm": 6.313951935010378, "learning_rate": 9.367010284140925e-06, "loss": 17.4356, "step": 10234 }, { "epoch": 0.1870875756301753, "grad_norm": 6.317780261667729, "learning_rate": 9.366866118915469e-06, "loss": 17.4707, "step": 10235 }, { "epoch": 0.18710585482662181, "grad_norm": 7.5308562210962195, "learning_rate": 9.36672193838453e-06, "loss": 18.0669, "step": 10236 }, { "epoch": 0.18712413402306835, "grad_norm": 6.156414652177409, "learning_rate": 9.366577742548606e-06, "loss": 17.4318, "step": 10237 }, { "epoch": 0.18714241321951486, "grad_norm": 7.545443507483131, "learning_rate": 9.366433531408206e-06, "loss": 17.7277, "step": 10238 }, { "epoch": 0.1871606924159614, "grad_norm": 7.1117771320861145, "learning_rate": 9.366289304963835e-06, "loss": 17.7777, "step": 10239 }, { "epoch": 0.18717897161240793, "grad_norm": 7.38012130805539, "learning_rate": 9.366145063216002e-06, "loss": 17.7226, "step": 10240 }, { "epoch": 0.18719725080885444, "grad_norm": 6.287584397312286, "learning_rate": 9.366000806165208e-06, "loss": 17.5169, "step": 10241 }, { "epoch": 0.18721553000530097, "grad_norm": 8.272480728809974, "learning_rate": 9.365856533811958e-06, "loss": 18.2174, "step": 10242 }, { "epoch": 0.18723380920174748, "grad_norm": 6.334337383323769, "learning_rate": 9.365712246156762e-06, "loss": 17.4599, "step": 10243 }, { "epoch": 0.18725208839819402, "grad_norm": 6.252816761629262, "learning_rate": 9.365567943200122e-06, "loss": 17.4577, "step": 10244 }, { "epoch": 0.18727036759464055, "grad_norm": 7.720308446724791, "learning_rate": 9.365423624942546e-06, "loss": 17.9001, "step": 10245 }, { "epoch": 0.18728864679108706, "grad_norm": 6.447776379913213, "learning_rate": 9.365279291384539e-06, "loss": 17.6432, "step": 10246 }, { "epoch": 0.1873069259875336, "grad_norm": 6.534097759809123, "learning_rate": 9.365134942526606e-06, "loss": 17.6352, "step": 10247 }, { "epoch": 0.1873252051839801, "grad_norm": 5.86259723785051, "learning_rate": 9.364990578369255e-06, "loss": 17.3302, "step": 10248 }, { "epoch": 0.18734348438042664, "grad_norm": 7.858264480931946, "learning_rate": 9.36484619891299e-06, "loss": 18.0367, "step": 10249 }, { "epoch": 0.18736176357687315, "grad_norm": 6.153744360184581, "learning_rate": 9.364701804158318e-06, "loss": 17.3333, "step": 10250 }, { "epoch": 0.18738004277331968, "grad_norm": 6.815205240338957, "learning_rate": 9.364557394105746e-06, "loss": 17.5782, "step": 10251 }, { "epoch": 0.18739832196976622, "grad_norm": 7.874192778617276, "learning_rate": 9.364412968755777e-06, "loss": 18.5423, "step": 10252 }, { "epoch": 0.18741660116621273, "grad_norm": 8.207110183282222, "learning_rate": 9.36426852810892e-06, "loss": 18.8335, "step": 10253 }, { "epoch": 0.18743488036265926, "grad_norm": 6.255530075436144, "learning_rate": 9.36412407216568e-06, "loss": 17.1486, "step": 10254 }, { "epoch": 0.18745315955910577, "grad_norm": 5.798395753238181, "learning_rate": 9.363979600926567e-06, "loss": 17.2161, "step": 10255 }, { "epoch": 0.1874714387555523, "grad_norm": 6.092057908902809, "learning_rate": 9.363835114392082e-06, "loss": 17.2954, "step": 10256 }, { "epoch": 0.18748971795199884, "grad_norm": 5.69434192164635, "learning_rate": 9.363690612562732e-06, "loss": 17.2059, "step": 10257 }, { "epoch": 0.18750799714844535, "grad_norm": 5.785054308131656, "learning_rate": 9.363546095439026e-06, "loss": 17.1083, "step": 10258 }, { "epoch": 0.18752627634489188, "grad_norm": 6.479975436775092, "learning_rate": 9.363401563021472e-06, "loss": 17.6234, "step": 10259 }, { "epoch": 0.1875445555413384, "grad_norm": 7.197517234570277, "learning_rate": 9.363257015310572e-06, "loss": 17.8155, "step": 10260 }, { "epoch": 0.18756283473778493, "grad_norm": 6.108622891651722, "learning_rate": 9.363112452306835e-06, "loss": 17.2461, "step": 10261 }, { "epoch": 0.18758111393423146, "grad_norm": 8.276899118951857, "learning_rate": 9.362967874010768e-06, "loss": 17.9235, "step": 10262 }, { "epoch": 0.18759939313067797, "grad_norm": 6.631154795403799, "learning_rate": 9.362823280422877e-06, "loss": 17.6759, "step": 10263 }, { "epoch": 0.1876176723271245, "grad_norm": 6.665722795146296, "learning_rate": 9.362678671543668e-06, "loss": 17.5608, "step": 10264 }, { "epoch": 0.18763595152357102, "grad_norm": 7.2208338930364215, "learning_rate": 9.36253404737365e-06, "loss": 17.7458, "step": 10265 }, { "epoch": 0.18765423072001755, "grad_norm": 8.015701508939888, "learning_rate": 9.362389407913327e-06, "loss": 18.339, "step": 10266 }, { "epoch": 0.18767250991646406, "grad_norm": 7.048841773166798, "learning_rate": 9.36224475316321e-06, "loss": 17.832, "step": 10267 }, { "epoch": 0.1876907891129106, "grad_norm": 6.466751494526317, "learning_rate": 9.362100083123803e-06, "loss": 17.7112, "step": 10268 }, { "epoch": 0.18770906830935713, "grad_norm": 7.476181853417334, "learning_rate": 9.361955397795613e-06, "loss": 17.9526, "step": 10269 }, { "epoch": 0.18772734750580364, "grad_norm": 7.011969367959185, "learning_rate": 9.36181069717915e-06, "loss": 17.746, "step": 10270 }, { "epoch": 0.18774562670225017, "grad_norm": 6.297938662302559, "learning_rate": 9.361665981274916e-06, "loss": 17.3984, "step": 10271 }, { "epoch": 0.18776390589869668, "grad_norm": 6.835725591588053, "learning_rate": 9.361521250083422e-06, "loss": 17.6402, "step": 10272 }, { "epoch": 0.18778218509514322, "grad_norm": 6.118580864255345, "learning_rate": 9.361376503605174e-06, "loss": 17.5137, "step": 10273 }, { "epoch": 0.18780046429158975, "grad_norm": 6.648036608556335, "learning_rate": 9.361231741840684e-06, "loss": 17.4866, "step": 10274 }, { "epoch": 0.18781874348803626, "grad_norm": 6.4771008015554035, "learning_rate": 9.361086964790452e-06, "loss": 17.3811, "step": 10275 }, { "epoch": 0.1878370226844828, "grad_norm": 6.243823701274997, "learning_rate": 9.360942172454987e-06, "loss": 17.1627, "step": 10276 }, { "epoch": 0.1878553018809293, "grad_norm": 5.83650409867925, "learning_rate": 9.360797364834799e-06, "loss": 17.3612, "step": 10277 }, { "epoch": 0.18787358107737584, "grad_norm": 8.524501459586926, "learning_rate": 9.360652541930396e-06, "loss": 18.3621, "step": 10278 }, { "epoch": 0.18789186027382238, "grad_norm": 6.137853498680438, "learning_rate": 9.360507703742285e-06, "loss": 17.3574, "step": 10279 }, { "epoch": 0.18791013947026888, "grad_norm": 7.107590055221519, "learning_rate": 9.36036285027097e-06, "loss": 17.6897, "step": 10280 }, { "epoch": 0.18792841866671542, "grad_norm": 8.289986445811634, "learning_rate": 9.360217981516963e-06, "loss": 18.3256, "step": 10281 }, { "epoch": 0.18794669786316193, "grad_norm": 8.307333242252078, "learning_rate": 9.360073097480771e-06, "loss": 17.9867, "step": 10282 }, { "epoch": 0.18796497705960846, "grad_norm": 6.639776770178107, "learning_rate": 9.3599281981629e-06, "loss": 17.4129, "step": 10283 }, { "epoch": 0.18798325625605497, "grad_norm": 9.511438673072446, "learning_rate": 9.35978328356386e-06, "loss": 18.4594, "step": 10284 }, { "epoch": 0.1880015354525015, "grad_norm": 7.389362953957318, "learning_rate": 9.359638353684157e-06, "loss": 17.4465, "step": 10285 }, { "epoch": 0.18801981464894804, "grad_norm": 7.843654781813417, "learning_rate": 9.3594934085243e-06, "loss": 18.0209, "step": 10286 }, { "epoch": 0.18803809384539455, "grad_norm": 5.828374392464251, "learning_rate": 9.359348448084798e-06, "loss": 17.119, "step": 10287 }, { "epoch": 0.18805637304184109, "grad_norm": 6.8428373551619055, "learning_rate": 9.359203472366158e-06, "loss": 17.4392, "step": 10288 }, { "epoch": 0.1880746522382876, "grad_norm": 6.622676320783083, "learning_rate": 9.359058481368888e-06, "loss": 17.6567, "step": 10289 }, { "epoch": 0.18809293143473413, "grad_norm": 6.338188570627085, "learning_rate": 9.358913475093496e-06, "loss": 17.4636, "step": 10290 }, { "epoch": 0.18811121063118066, "grad_norm": 6.943641282726379, "learning_rate": 9.35876845354049e-06, "loss": 17.5162, "step": 10291 }, { "epoch": 0.18812948982762717, "grad_norm": 7.508261262549054, "learning_rate": 9.358623416710378e-06, "loss": 18.2523, "step": 10292 }, { "epoch": 0.1881477690240737, "grad_norm": 7.615613676014433, "learning_rate": 9.35847836460367e-06, "loss": 17.8776, "step": 10293 }, { "epoch": 0.18816604822052022, "grad_norm": 7.960862017040444, "learning_rate": 9.358333297220875e-06, "loss": 18.1081, "step": 10294 }, { "epoch": 0.18818432741696675, "grad_norm": 7.710056369955521, "learning_rate": 9.358188214562499e-06, "loss": 17.9136, "step": 10295 }, { "epoch": 0.1882026066134133, "grad_norm": 7.86632430016447, "learning_rate": 9.35804311662905e-06, "loss": 18.3673, "step": 10296 }, { "epoch": 0.1882208858098598, "grad_norm": 7.047120650918058, "learning_rate": 9.35789800342104e-06, "loss": 17.8213, "step": 10297 }, { "epoch": 0.18823916500630633, "grad_norm": 8.508225806407097, "learning_rate": 9.357752874938975e-06, "loss": 18.6244, "step": 10298 }, { "epoch": 0.18825744420275284, "grad_norm": 7.954864284670036, "learning_rate": 9.357607731183362e-06, "loss": 18.0167, "step": 10299 }, { "epoch": 0.18827572339919937, "grad_norm": 7.305427095963071, "learning_rate": 9.357462572154716e-06, "loss": 17.9168, "step": 10300 }, { "epoch": 0.18829400259564588, "grad_norm": 7.117544417631457, "learning_rate": 9.35731739785354e-06, "loss": 17.4382, "step": 10301 }, { "epoch": 0.18831228179209242, "grad_norm": 7.269573153020997, "learning_rate": 9.357172208280344e-06, "loss": 17.7793, "step": 10302 }, { "epoch": 0.18833056098853895, "grad_norm": 5.5781282396621865, "learning_rate": 9.357027003435638e-06, "loss": 17.145, "step": 10303 }, { "epoch": 0.18834884018498546, "grad_norm": 8.244892639621913, "learning_rate": 9.356881783319932e-06, "loss": 17.9596, "step": 10304 }, { "epoch": 0.188367119381432, "grad_norm": 7.491096000135977, "learning_rate": 9.356736547933731e-06, "loss": 17.7012, "step": 10305 }, { "epoch": 0.1883853985778785, "grad_norm": 6.639930293503112, "learning_rate": 9.356591297277548e-06, "loss": 17.5532, "step": 10306 }, { "epoch": 0.18840367777432504, "grad_norm": 7.712491671647756, "learning_rate": 9.35644603135189e-06, "loss": 17.6495, "step": 10307 }, { "epoch": 0.18842195697077158, "grad_norm": 7.09722689162491, "learning_rate": 9.356300750157266e-06, "loss": 17.4289, "step": 10308 }, { "epoch": 0.18844023616721808, "grad_norm": 7.023241890632782, "learning_rate": 9.356155453694186e-06, "loss": 17.7111, "step": 10309 }, { "epoch": 0.18845851536366462, "grad_norm": 6.371635969895364, "learning_rate": 9.356010141963161e-06, "loss": 17.6677, "step": 10310 }, { "epoch": 0.18847679456011113, "grad_norm": 6.038358992274468, "learning_rate": 9.355864814964696e-06, "loss": 17.4605, "step": 10311 }, { "epoch": 0.18849507375655766, "grad_norm": 6.295758541209396, "learning_rate": 9.355719472699306e-06, "loss": 17.3981, "step": 10312 }, { "epoch": 0.1885133529530042, "grad_norm": 7.163153660352481, "learning_rate": 9.355574115167493e-06, "loss": 17.5929, "step": 10313 }, { "epoch": 0.1885316321494507, "grad_norm": 7.708420745067751, "learning_rate": 9.355428742369774e-06, "loss": 17.9523, "step": 10314 }, { "epoch": 0.18854991134589724, "grad_norm": 7.916856030708843, "learning_rate": 9.355283354306655e-06, "loss": 18.0848, "step": 10315 }, { "epoch": 0.18856819054234375, "grad_norm": 6.276171146796963, "learning_rate": 9.355137950978644e-06, "loss": 17.5003, "step": 10316 }, { "epoch": 0.18858646973879029, "grad_norm": 5.716743447353856, "learning_rate": 9.354992532386253e-06, "loss": 17.2313, "step": 10317 }, { "epoch": 0.1886047489352368, "grad_norm": 5.909377827740273, "learning_rate": 9.35484709852999e-06, "loss": 17.4874, "step": 10318 }, { "epoch": 0.18862302813168333, "grad_norm": 7.926833432487396, "learning_rate": 9.354701649410369e-06, "loss": 18.0493, "step": 10319 }, { "epoch": 0.18864130732812986, "grad_norm": 5.893369379778734, "learning_rate": 9.354556185027894e-06, "loss": 17.1941, "step": 10320 }, { "epoch": 0.18865958652457637, "grad_norm": 6.925840564390966, "learning_rate": 9.354410705383079e-06, "loss": 17.9839, "step": 10321 }, { "epoch": 0.1886778657210229, "grad_norm": 6.496801526721922, "learning_rate": 9.354265210476432e-06, "loss": 17.4445, "step": 10322 }, { "epoch": 0.18869614491746942, "grad_norm": 7.164697364599269, "learning_rate": 9.354119700308463e-06, "loss": 17.9656, "step": 10323 }, { "epoch": 0.18871442411391595, "grad_norm": 5.870553458649823, "learning_rate": 9.353974174879684e-06, "loss": 17.3856, "step": 10324 }, { "epoch": 0.1887327033103625, "grad_norm": 6.473687299671137, "learning_rate": 9.3538286341906e-06, "loss": 17.3945, "step": 10325 }, { "epoch": 0.188750982506809, "grad_norm": 6.800167833248619, "learning_rate": 9.353683078241726e-06, "loss": 17.5475, "step": 10326 }, { "epoch": 0.18876926170325553, "grad_norm": 8.797610668770592, "learning_rate": 9.35353750703357e-06, "loss": 18.4934, "step": 10327 }, { "epoch": 0.18878754089970204, "grad_norm": 6.9870955597911895, "learning_rate": 9.353391920566643e-06, "loss": 17.7022, "step": 10328 }, { "epoch": 0.18880582009614857, "grad_norm": 7.91011523509433, "learning_rate": 9.353246318841456e-06, "loss": 17.9763, "step": 10329 }, { "epoch": 0.1888240992925951, "grad_norm": 6.583339412986538, "learning_rate": 9.353100701858517e-06, "loss": 17.4026, "step": 10330 }, { "epoch": 0.18884237848904162, "grad_norm": 7.0968858218294795, "learning_rate": 9.35295506961834e-06, "loss": 17.9342, "step": 10331 }, { "epoch": 0.18886065768548815, "grad_norm": 7.034159509522916, "learning_rate": 9.352809422121432e-06, "loss": 17.4617, "step": 10332 }, { "epoch": 0.18887893688193466, "grad_norm": 7.634212798384977, "learning_rate": 9.352663759368303e-06, "loss": 17.9708, "step": 10333 }, { "epoch": 0.1888972160783812, "grad_norm": 7.91627024684817, "learning_rate": 9.352518081359468e-06, "loss": 18.0931, "step": 10334 }, { "epoch": 0.1889154952748277, "grad_norm": 7.879522839290091, "learning_rate": 9.352372388095435e-06, "loss": 17.8632, "step": 10335 }, { "epoch": 0.18893377447127424, "grad_norm": 5.875811968143654, "learning_rate": 9.352226679576712e-06, "loss": 17.447, "step": 10336 }, { "epoch": 0.18895205366772078, "grad_norm": 6.97015228690603, "learning_rate": 9.352080955803813e-06, "loss": 17.7629, "step": 10337 }, { "epoch": 0.18897033286416728, "grad_norm": 5.965939193317881, "learning_rate": 9.351935216777248e-06, "loss": 17.1888, "step": 10338 }, { "epoch": 0.18898861206061382, "grad_norm": 7.103798476791659, "learning_rate": 9.351789462497529e-06, "loss": 17.7767, "step": 10339 }, { "epoch": 0.18900689125706033, "grad_norm": 6.769213501368472, "learning_rate": 9.351643692965164e-06, "loss": 17.9347, "step": 10340 }, { "epoch": 0.18902517045350686, "grad_norm": 7.488585050253277, "learning_rate": 9.351497908180664e-06, "loss": 17.8217, "step": 10341 }, { "epoch": 0.1890434496499534, "grad_norm": 7.6060020362558545, "learning_rate": 9.351352108144544e-06, "loss": 18.2705, "step": 10342 }, { "epoch": 0.1890617288463999, "grad_norm": 6.013510437455529, "learning_rate": 9.351206292857312e-06, "loss": 17.2332, "step": 10343 }, { "epoch": 0.18908000804284644, "grad_norm": 7.556401584698262, "learning_rate": 9.35106046231948e-06, "loss": 18.0483, "step": 10344 }, { "epoch": 0.18909828723929295, "grad_norm": 8.603068189092529, "learning_rate": 9.350914616531557e-06, "loss": 17.7321, "step": 10345 }, { "epoch": 0.18911656643573949, "grad_norm": 13.481326330149399, "learning_rate": 9.350768755494057e-06, "loss": 17.8328, "step": 10346 }, { "epoch": 0.18913484563218602, "grad_norm": 7.0546202021714945, "learning_rate": 9.35062287920749e-06, "loss": 17.8827, "step": 10347 }, { "epoch": 0.18915312482863253, "grad_norm": 7.258429206863186, "learning_rate": 9.350476987672367e-06, "loss": 17.8897, "step": 10348 }, { "epoch": 0.18917140402507907, "grad_norm": 6.951416055469755, "learning_rate": 9.350331080889201e-06, "loss": 18.0216, "step": 10349 }, { "epoch": 0.18918968322152557, "grad_norm": 7.256919398246387, "learning_rate": 9.3501851588585e-06, "loss": 17.7268, "step": 10350 }, { "epoch": 0.1892079624179721, "grad_norm": 6.828697846997594, "learning_rate": 9.350039221580778e-06, "loss": 17.7134, "step": 10351 }, { "epoch": 0.18922624161441862, "grad_norm": 5.90178890801479, "learning_rate": 9.349893269056547e-06, "loss": 17.1515, "step": 10352 }, { "epoch": 0.18924452081086515, "grad_norm": 6.496939715605593, "learning_rate": 9.349747301286317e-06, "loss": 17.6936, "step": 10353 }, { "epoch": 0.1892628000073117, "grad_norm": 9.108829587225198, "learning_rate": 9.349601318270601e-06, "loss": 18.645, "step": 10354 }, { "epoch": 0.1892810792037582, "grad_norm": 5.86049343307796, "learning_rate": 9.349455320009907e-06, "loss": 17.3097, "step": 10355 }, { "epoch": 0.18929935840020473, "grad_norm": 6.701323732100626, "learning_rate": 9.349309306504752e-06, "loss": 17.5384, "step": 10356 }, { "epoch": 0.18931763759665124, "grad_norm": 6.497133703269843, "learning_rate": 9.349163277755646e-06, "loss": 17.5846, "step": 10357 }, { "epoch": 0.18933591679309777, "grad_norm": 6.982833709429088, "learning_rate": 9.349017233763099e-06, "loss": 17.5816, "step": 10358 }, { "epoch": 0.1893541959895443, "grad_norm": 6.928286726453096, "learning_rate": 9.348871174527622e-06, "loss": 17.7773, "step": 10359 }, { "epoch": 0.18937247518599082, "grad_norm": 5.814438366221879, "learning_rate": 9.348725100049732e-06, "loss": 17.2186, "step": 10360 }, { "epoch": 0.18939075438243735, "grad_norm": 6.791611147286944, "learning_rate": 9.348579010329938e-06, "loss": 17.5189, "step": 10361 }, { "epoch": 0.18940903357888386, "grad_norm": 7.006833346279452, "learning_rate": 9.34843290536875e-06, "loss": 17.6483, "step": 10362 }, { "epoch": 0.1894273127753304, "grad_norm": 6.7005998859096225, "learning_rate": 9.348286785166682e-06, "loss": 17.5486, "step": 10363 }, { "epoch": 0.18944559197177693, "grad_norm": 7.311681418110757, "learning_rate": 9.348140649724246e-06, "loss": 17.7656, "step": 10364 }, { "epoch": 0.18946387116822344, "grad_norm": 5.209233208298461, "learning_rate": 9.347994499041958e-06, "loss": 16.818, "step": 10365 }, { "epoch": 0.18948215036466998, "grad_norm": 8.244729645321057, "learning_rate": 9.347848333120321e-06, "loss": 18.3306, "step": 10366 }, { "epoch": 0.18950042956111648, "grad_norm": 5.912515092024369, "learning_rate": 9.347702151959856e-06, "loss": 17.4306, "step": 10367 }, { "epoch": 0.18951870875756302, "grad_norm": 7.409086396633225, "learning_rate": 9.347555955561072e-06, "loss": 17.9846, "step": 10368 }, { "epoch": 0.18953698795400953, "grad_norm": 7.55162462688707, "learning_rate": 9.347409743924483e-06, "loss": 17.9045, "step": 10369 }, { "epoch": 0.18955526715045606, "grad_norm": 6.335150015054112, "learning_rate": 9.347263517050598e-06, "loss": 17.3046, "step": 10370 }, { "epoch": 0.1895735463469026, "grad_norm": 4.602767659201406, "learning_rate": 9.347117274939933e-06, "loss": 16.7154, "step": 10371 }, { "epoch": 0.1895918255433491, "grad_norm": 6.774494471367734, "learning_rate": 9.346971017592996e-06, "loss": 17.6588, "step": 10372 }, { "epoch": 0.18961010473979564, "grad_norm": 7.728532927150962, "learning_rate": 9.346824745010306e-06, "loss": 18.0744, "step": 10373 }, { "epoch": 0.18962838393624215, "grad_norm": 5.58968017477524, "learning_rate": 9.346678457192372e-06, "loss": 17.0602, "step": 10374 }, { "epoch": 0.1896466631326887, "grad_norm": 7.543744868813387, "learning_rate": 9.346532154139707e-06, "loss": 18.1805, "step": 10375 }, { "epoch": 0.18966494232913522, "grad_norm": 7.632468889333532, "learning_rate": 9.346385835852824e-06, "loss": 17.1272, "step": 10376 }, { "epoch": 0.18968322152558173, "grad_norm": 7.269525125102864, "learning_rate": 9.346239502332234e-06, "loss": 17.8056, "step": 10377 }, { "epoch": 0.18970150072202827, "grad_norm": 7.960912859082299, "learning_rate": 9.346093153578455e-06, "loss": 18.0061, "step": 10378 }, { "epoch": 0.18971977991847477, "grad_norm": 7.9004152781134795, "learning_rate": 9.345946789591995e-06, "loss": 18.1292, "step": 10379 }, { "epoch": 0.1897380591149213, "grad_norm": 8.526333028061192, "learning_rate": 9.345800410373366e-06, "loss": 18.1318, "step": 10380 }, { "epoch": 0.18975633831136784, "grad_norm": 6.8944897455067125, "learning_rate": 9.345654015923088e-06, "loss": 17.6491, "step": 10381 }, { "epoch": 0.18977461750781435, "grad_norm": 7.406943808270386, "learning_rate": 9.345507606241668e-06, "loss": 17.9084, "step": 10382 }, { "epoch": 0.1897928967042609, "grad_norm": 6.049216269328242, "learning_rate": 9.34536118132962e-06, "loss": 17.2285, "step": 10383 }, { "epoch": 0.1898111759007074, "grad_norm": 7.43964672162737, "learning_rate": 9.345214741187461e-06, "loss": 17.5745, "step": 10384 }, { "epoch": 0.18982945509715393, "grad_norm": 8.26994894520124, "learning_rate": 9.345068285815698e-06, "loss": 18.1613, "step": 10385 }, { "epoch": 0.18984773429360044, "grad_norm": 7.707488960899155, "learning_rate": 9.34492181521485e-06, "loss": 17.9862, "step": 10386 }, { "epoch": 0.18986601349004698, "grad_norm": 16.5411776967301, "learning_rate": 9.344775329385427e-06, "loss": 17.9865, "step": 10387 }, { "epoch": 0.1898842926864935, "grad_norm": 5.718349447364829, "learning_rate": 9.344628828327944e-06, "loss": 17.2111, "step": 10388 }, { "epoch": 0.18990257188294002, "grad_norm": 5.886715887936213, "learning_rate": 9.344482312042914e-06, "loss": 17.2884, "step": 10389 }, { "epoch": 0.18992085107938655, "grad_norm": 6.4689961628351735, "learning_rate": 9.34433578053085e-06, "loss": 17.2763, "step": 10390 }, { "epoch": 0.18993913027583306, "grad_norm": 5.2140418989877615, "learning_rate": 9.344189233792265e-06, "loss": 16.9948, "step": 10391 }, { "epoch": 0.1899574094722796, "grad_norm": 6.900030581357269, "learning_rate": 9.344042671827676e-06, "loss": 17.7844, "step": 10392 }, { "epoch": 0.18997568866872613, "grad_norm": 9.860719450425975, "learning_rate": 9.343896094637593e-06, "loss": 18.0996, "step": 10393 }, { "epoch": 0.18999396786517264, "grad_norm": 6.861050770224629, "learning_rate": 9.343749502222532e-06, "loss": 17.8036, "step": 10394 }, { "epoch": 0.19001224706161918, "grad_norm": 6.806301180607863, "learning_rate": 9.343602894583004e-06, "loss": 17.6822, "step": 10395 }, { "epoch": 0.19003052625806569, "grad_norm": 6.882477940579044, "learning_rate": 9.343456271719527e-06, "loss": 17.7113, "step": 10396 }, { "epoch": 0.19004880545451222, "grad_norm": 6.843754715150214, "learning_rate": 9.34330963363261e-06, "loss": 17.2852, "step": 10397 }, { "epoch": 0.19006708465095876, "grad_norm": 7.827123227667387, "learning_rate": 9.343162980322773e-06, "loss": 18.1252, "step": 10398 }, { "epoch": 0.19008536384740526, "grad_norm": 8.605165913765676, "learning_rate": 9.343016311790525e-06, "loss": 18.6341, "step": 10399 }, { "epoch": 0.1901036430438518, "grad_norm": 6.029185270059626, "learning_rate": 9.342869628036382e-06, "loss": 17.426, "step": 10400 }, { "epoch": 0.1901219222402983, "grad_norm": 6.334705243565285, "learning_rate": 9.342722929060858e-06, "loss": 17.5421, "step": 10401 }, { "epoch": 0.19014020143674484, "grad_norm": 6.863610700531152, "learning_rate": 9.342576214864466e-06, "loss": 17.9516, "step": 10402 }, { "epoch": 0.19015848063319135, "grad_norm": 6.793845212938461, "learning_rate": 9.342429485447721e-06, "loss": 17.5948, "step": 10403 }, { "epoch": 0.1901767598296379, "grad_norm": 5.971066691459425, "learning_rate": 9.342282740811139e-06, "loss": 17.4468, "step": 10404 }, { "epoch": 0.19019503902608442, "grad_norm": 8.698390597005583, "learning_rate": 9.342135980955233e-06, "loss": 18.2342, "step": 10405 }, { "epoch": 0.19021331822253093, "grad_norm": 6.172372569569622, "learning_rate": 9.341989205880516e-06, "loss": 17.3087, "step": 10406 }, { "epoch": 0.19023159741897747, "grad_norm": 5.496960166619962, "learning_rate": 9.341842415587502e-06, "loss": 17.0624, "step": 10407 }, { "epoch": 0.19024987661542397, "grad_norm": 7.701462103673928, "learning_rate": 9.34169561007671e-06, "loss": 18.0715, "step": 10408 }, { "epoch": 0.1902681558118705, "grad_norm": 7.671296168887727, "learning_rate": 9.341548789348652e-06, "loss": 17.813, "step": 10409 }, { "epoch": 0.19028643500831705, "grad_norm": 5.881014951677098, "learning_rate": 9.34140195340384e-06, "loss": 17.299, "step": 10410 }, { "epoch": 0.19030471420476355, "grad_norm": 8.036024617783314, "learning_rate": 9.341255102242792e-06, "loss": 18.0828, "step": 10411 }, { "epoch": 0.1903229934012101, "grad_norm": 6.9130025699726065, "learning_rate": 9.34110823586602e-06, "loss": 17.6697, "step": 10412 }, { "epoch": 0.1903412725976566, "grad_norm": 8.82180752045835, "learning_rate": 9.340961354274043e-06, "loss": 18.1533, "step": 10413 }, { "epoch": 0.19035955179410313, "grad_norm": 5.968379644562009, "learning_rate": 9.34081445746737e-06, "loss": 17.1328, "step": 10414 }, { "epoch": 0.19037783099054967, "grad_norm": 7.278498339936495, "learning_rate": 9.340667545446522e-06, "loss": 18.0188, "step": 10415 }, { "epoch": 0.19039611018699618, "grad_norm": 6.124699532025761, "learning_rate": 9.34052061821201e-06, "loss": 17.4712, "step": 10416 }, { "epoch": 0.1904143893834427, "grad_norm": 6.572271076368137, "learning_rate": 9.34037367576435e-06, "loss": 17.8327, "step": 10417 }, { "epoch": 0.19043266857988922, "grad_norm": 5.6625490691218845, "learning_rate": 9.340226718104057e-06, "loss": 16.9799, "step": 10418 }, { "epoch": 0.19045094777633576, "grad_norm": 8.470132525616112, "learning_rate": 9.340079745231645e-06, "loss": 17.9597, "step": 10419 }, { "epoch": 0.19046922697278226, "grad_norm": 7.046150956276198, "learning_rate": 9.33993275714763e-06, "loss": 17.8574, "step": 10420 }, { "epoch": 0.1904875061692288, "grad_norm": 6.966526694615226, "learning_rate": 9.339785753852529e-06, "loss": 17.6667, "step": 10421 }, { "epoch": 0.19050578536567533, "grad_norm": 5.102008911949953, "learning_rate": 9.339638735346854e-06, "loss": 16.9083, "step": 10422 }, { "epoch": 0.19052406456212184, "grad_norm": 6.537757982753672, "learning_rate": 9.339491701631122e-06, "loss": 17.4962, "step": 10423 }, { "epoch": 0.19054234375856838, "grad_norm": 6.043523754574515, "learning_rate": 9.339344652705848e-06, "loss": 17.3628, "step": 10424 }, { "epoch": 0.19056062295501489, "grad_norm": 8.846123908373793, "learning_rate": 9.339197588571549e-06, "loss": 18.4887, "step": 10425 }, { "epoch": 0.19057890215146142, "grad_norm": 9.695608602744546, "learning_rate": 9.339050509228737e-06, "loss": 18.5418, "step": 10426 }, { "epoch": 0.19059718134790796, "grad_norm": 6.77165990869322, "learning_rate": 9.33890341467793e-06, "loss": 17.6323, "step": 10427 }, { "epoch": 0.19061546054435446, "grad_norm": 6.4706053546411395, "learning_rate": 9.338756304919644e-06, "loss": 17.5203, "step": 10428 }, { "epoch": 0.190633739740801, "grad_norm": 7.2279891914592245, "learning_rate": 9.338609179954393e-06, "loss": 18.0453, "step": 10429 }, { "epoch": 0.1906520189372475, "grad_norm": 5.717809797629613, "learning_rate": 9.338462039782695e-06, "loss": 17.0092, "step": 10430 }, { "epoch": 0.19067029813369404, "grad_norm": 6.453387703051551, "learning_rate": 9.33831488440506e-06, "loss": 17.5693, "step": 10431 }, { "epoch": 0.19068857733014058, "grad_norm": 6.286712840872121, "learning_rate": 9.33816771382201e-06, "loss": 17.3303, "step": 10432 }, { "epoch": 0.1907068565265871, "grad_norm": 6.518167738374248, "learning_rate": 9.33802052803406e-06, "loss": 17.5911, "step": 10433 }, { "epoch": 0.19072513572303362, "grad_norm": 6.043467175502499, "learning_rate": 9.337873327041723e-06, "loss": 17.274, "step": 10434 }, { "epoch": 0.19074341491948013, "grad_norm": 7.354373411650934, "learning_rate": 9.337726110845518e-06, "loss": 17.6185, "step": 10435 }, { "epoch": 0.19076169411592667, "grad_norm": 6.884420085630948, "learning_rate": 9.337578879445957e-06, "loss": 17.7226, "step": 10436 }, { "epoch": 0.19077997331237317, "grad_norm": 7.995802622064922, "learning_rate": 9.33743163284356e-06, "loss": 18.1389, "step": 10437 }, { "epoch": 0.1907982525088197, "grad_norm": 6.190516487906699, "learning_rate": 9.337284371038841e-06, "loss": 17.4509, "step": 10438 }, { "epoch": 0.19081653170526625, "grad_norm": 7.035449624213116, "learning_rate": 9.337137094032316e-06, "loss": 17.7969, "step": 10439 }, { "epoch": 0.19083481090171275, "grad_norm": 5.999930652722676, "learning_rate": 9.336989801824504e-06, "loss": 17.3565, "step": 10440 }, { "epoch": 0.1908530900981593, "grad_norm": 7.1151169267855705, "learning_rate": 9.336842494415916e-06, "loss": 18.1344, "step": 10441 }, { "epoch": 0.1908713692946058, "grad_norm": 6.458074269714437, "learning_rate": 9.336695171807074e-06, "loss": 17.5333, "step": 10442 }, { "epoch": 0.19088964849105233, "grad_norm": 5.981033483435007, "learning_rate": 9.33654783399849e-06, "loss": 17.2124, "step": 10443 }, { "epoch": 0.19090792768749887, "grad_norm": 6.505129219203311, "learning_rate": 9.336400480990684e-06, "loss": 17.423, "step": 10444 }, { "epoch": 0.19092620688394538, "grad_norm": 6.410413952750364, "learning_rate": 9.336253112784169e-06, "loss": 17.5055, "step": 10445 }, { "epoch": 0.1909444860803919, "grad_norm": 5.943410520611284, "learning_rate": 9.336105729379463e-06, "loss": 17.3018, "step": 10446 }, { "epoch": 0.19096276527683842, "grad_norm": 6.662730581833728, "learning_rate": 9.335958330777084e-06, "loss": 17.4771, "step": 10447 }, { "epoch": 0.19098104447328496, "grad_norm": 7.714959509225025, "learning_rate": 9.335810916977547e-06, "loss": 17.6309, "step": 10448 }, { "epoch": 0.1909993236697315, "grad_norm": 7.267940668830873, "learning_rate": 9.335663487981368e-06, "loss": 17.585, "step": 10449 }, { "epoch": 0.191017602866178, "grad_norm": 6.499461630992459, "learning_rate": 9.335516043789065e-06, "loss": 17.4037, "step": 10450 }, { "epoch": 0.19103588206262453, "grad_norm": 7.661848776562771, "learning_rate": 9.335368584401156e-06, "loss": 18.0498, "step": 10451 }, { "epoch": 0.19105416125907104, "grad_norm": 7.489698080064114, "learning_rate": 9.335221109818154e-06, "loss": 17.7291, "step": 10452 }, { "epoch": 0.19107244045551758, "grad_norm": 6.139836763169081, "learning_rate": 9.33507362004058e-06, "loss": 17.4132, "step": 10453 }, { "epoch": 0.19109071965196409, "grad_norm": 7.4440984180972904, "learning_rate": 9.334926115068949e-06, "loss": 17.8699, "step": 10454 }, { "epoch": 0.19110899884841062, "grad_norm": 6.946981728420909, "learning_rate": 9.334778594903777e-06, "loss": 17.8496, "step": 10455 }, { "epoch": 0.19112727804485716, "grad_norm": 7.214419541432831, "learning_rate": 9.334631059545583e-06, "loss": 17.9652, "step": 10456 }, { "epoch": 0.19114555724130367, "grad_norm": 6.70901587454331, "learning_rate": 9.334483508994883e-06, "loss": 17.8415, "step": 10457 }, { "epoch": 0.1911638364377502, "grad_norm": 8.014234392354286, "learning_rate": 9.334335943252196e-06, "loss": 18.1527, "step": 10458 }, { "epoch": 0.1911821156341967, "grad_norm": 6.94485658188503, "learning_rate": 9.334188362318035e-06, "loss": 17.511, "step": 10459 }, { "epoch": 0.19120039483064324, "grad_norm": 7.417707844606827, "learning_rate": 9.33404076619292e-06, "loss": 17.6998, "step": 10460 }, { "epoch": 0.19121867402708978, "grad_norm": 7.657042394726569, "learning_rate": 9.333893154877369e-06, "loss": 18.0186, "step": 10461 }, { "epoch": 0.1912369532235363, "grad_norm": 10.22892844201954, "learning_rate": 9.3337455283719e-06, "loss": 18.4338, "step": 10462 }, { "epoch": 0.19125523241998282, "grad_norm": 7.238723278249536, "learning_rate": 9.333597886677027e-06, "loss": 17.7325, "step": 10463 }, { "epoch": 0.19127351161642933, "grad_norm": 5.993710460992475, "learning_rate": 9.33345022979327e-06, "loss": 17.3588, "step": 10464 }, { "epoch": 0.19129179081287587, "grad_norm": 6.340197372919521, "learning_rate": 9.333302557721146e-06, "loss": 17.7382, "step": 10465 }, { "epoch": 0.1913100700093224, "grad_norm": 6.791912878405465, "learning_rate": 9.333154870461174e-06, "loss": 17.539, "step": 10466 }, { "epoch": 0.1913283492057689, "grad_norm": 6.060523017852661, "learning_rate": 9.333007168013868e-06, "loss": 17.3509, "step": 10467 }, { "epoch": 0.19134662840221545, "grad_norm": 7.2323114052940305, "learning_rate": 9.33285945037975e-06, "loss": 17.8316, "step": 10468 }, { "epoch": 0.19136490759866195, "grad_norm": 6.52251643048427, "learning_rate": 9.332711717559334e-06, "loss": 17.3098, "step": 10469 }, { "epoch": 0.1913831867951085, "grad_norm": 7.333715729836014, "learning_rate": 9.33256396955314e-06, "loss": 17.8015, "step": 10470 }, { "epoch": 0.191401465991555, "grad_norm": 7.244729110021288, "learning_rate": 9.332416206361686e-06, "loss": 17.6741, "step": 10471 }, { "epoch": 0.19141974518800153, "grad_norm": 7.336581266459314, "learning_rate": 9.332268427985487e-06, "loss": 17.6178, "step": 10472 }, { "epoch": 0.19143802438444807, "grad_norm": 9.518093122476557, "learning_rate": 9.332120634425067e-06, "loss": 18.0936, "step": 10473 }, { "epoch": 0.19145630358089458, "grad_norm": 5.7833375579983075, "learning_rate": 9.331972825680935e-06, "loss": 17.1807, "step": 10474 }, { "epoch": 0.1914745827773411, "grad_norm": 6.422057620330305, "learning_rate": 9.331825001753617e-06, "loss": 17.5081, "step": 10475 }, { "epoch": 0.19149286197378762, "grad_norm": 7.77793057493313, "learning_rate": 9.331677162643629e-06, "loss": 17.7322, "step": 10476 }, { "epoch": 0.19151114117023416, "grad_norm": 6.446977267695607, "learning_rate": 9.331529308351485e-06, "loss": 17.4001, "step": 10477 }, { "epoch": 0.1915294203666807, "grad_norm": 5.9778560695633125, "learning_rate": 9.33138143887771e-06, "loss": 17.3762, "step": 10478 }, { "epoch": 0.1915476995631272, "grad_norm": 6.533211496809169, "learning_rate": 9.331233554222819e-06, "loss": 17.6646, "step": 10479 }, { "epoch": 0.19156597875957374, "grad_norm": 5.958983266303413, "learning_rate": 9.331085654387328e-06, "loss": 17.2335, "step": 10480 }, { "epoch": 0.19158425795602024, "grad_norm": 6.083401883455692, "learning_rate": 9.33093773937176e-06, "loss": 17.2894, "step": 10481 }, { "epoch": 0.19160253715246678, "grad_norm": 6.732405095493683, "learning_rate": 9.33078980917663e-06, "loss": 17.529, "step": 10482 }, { "epoch": 0.19162081634891331, "grad_norm": 7.588140581672176, "learning_rate": 9.330641863802457e-06, "loss": 17.6138, "step": 10483 }, { "epoch": 0.19163909554535982, "grad_norm": 6.839579510099215, "learning_rate": 9.33049390324976e-06, "loss": 17.571, "step": 10484 }, { "epoch": 0.19165737474180636, "grad_norm": 6.41343049608932, "learning_rate": 9.330345927519057e-06, "loss": 17.846, "step": 10485 }, { "epoch": 0.19167565393825287, "grad_norm": 7.451348130479233, "learning_rate": 9.33019793661087e-06, "loss": 17.8147, "step": 10486 }, { "epoch": 0.1916939331346994, "grad_norm": 5.82472880839712, "learning_rate": 9.330049930525713e-06, "loss": 17.2141, "step": 10487 }, { "epoch": 0.1917122123311459, "grad_norm": 5.445147209150908, "learning_rate": 9.329901909264107e-06, "loss": 16.9596, "step": 10488 }, { "epoch": 0.19173049152759244, "grad_norm": 6.562132849224037, "learning_rate": 9.32975387282657e-06, "loss": 17.6303, "step": 10489 }, { "epoch": 0.19174877072403898, "grad_norm": 6.901108693152038, "learning_rate": 9.329605821213623e-06, "loss": 17.7932, "step": 10490 }, { "epoch": 0.1917670499204855, "grad_norm": 6.675714856643467, "learning_rate": 9.329457754425782e-06, "loss": 17.564, "step": 10491 }, { "epoch": 0.19178532911693202, "grad_norm": 7.517726578104453, "learning_rate": 9.329309672463567e-06, "loss": 18.0812, "step": 10492 }, { "epoch": 0.19180360831337853, "grad_norm": 7.3784629388301255, "learning_rate": 9.329161575327499e-06, "loss": 17.8641, "step": 10493 }, { "epoch": 0.19182188750982507, "grad_norm": 6.862251404933019, "learning_rate": 9.329013463018093e-06, "loss": 17.6595, "step": 10494 }, { "epoch": 0.1918401667062716, "grad_norm": 6.260407870718441, "learning_rate": 9.328865335535872e-06, "loss": 17.5309, "step": 10495 }, { "epoch": 0.1918584459027181, "grad_norm": 5.439647176748364, "learning_rate": 9.328717192881353e-06, "loss": 17.1329, "step": 10496 }, { "epoch": 0.19187672509916465, "grad_norm": 6.668790465111025, "learning_rate": 9.328569035055058e-06, "loss": 17.8241, "step": 10497 }, { "epoch": 0.19189500429561115, "grad_norm": 6.47700749270235, "learning_rate": 9.3284208620575e-06, "loss": 17.7274, "step": 10498 }, { "epoch": 0.1919132834920577, "grad_norm": 7.3122825475634485, "learning_rate": 9.328272673889206e-06, "loss": 17.7026, "step": 10499 }, { "epoch": 0.19193156268850423, "grad_norm": 7.002894709055904, "learning_rate": 9.32812447055069e-06, "loss": 17.5361, "step": 10500 }, { "epoch": 0.19194984188495073, "grad_norm": 8.460734664670872, "learning_rate": 9.327976252042474e-06, "loss": 18.4211, "step": 10501 }, { "epoch": 0.19196812108139727, "grad_norm": 6.835754991854399, "learning_rate": 9.327828018365078e-06, "loss": 17.9194, "step": 10502 }, { "epoch": 0.19198640027784378, "grad_norm": 6.5207677165144355, "learning_rate": 9.327679769519017e-06, "loss": 17.5511, "step": 10503 }, { "epoch": 0.1920046794742903, "grad_norm": 6.307911366992025, "learning_rate": 9.327531505504818e-06, "loss": 17.3657, "step": 10504 }, { "epoch": 0.19202295867073682, "grad_norm": 5.608000385527272, "learning_rate": 9.327383226322995e-06, "loss": 17.2065, "step": 10505 }, { "epoch": 0.19204123786718336, "grad_norm": 7.304296844315352, "learning_rate": 9.327234931974068e-06, "loss": 17.6759, "step": 10506 }, { "epoch": 0.1920595170636299, "grad_norm": 7.672875311084413, "learning_rate": 9.327086622458559e-06, "loss": 18.0848, "step": 10507 }, { "epoch": 0.1920777962600764, "grad_norm": 7.913357073006939, "learning_rate": 9.326938297776987e-06, "loss": 17.7198, "step": 10508 }, { "epoch": 0.19209607545652294, "grad_norm": 6.333736124268162, "learning_rate": 9.326789957929872e-06, "loss": 17.3474, "step": 10509 }, { "epoch": 0.19211435465296944, "grad_norm": 6.905903139041645, "learning_rate": 9.326641602917734e-06, "loss": 17.6535, "step": 10510 }, { "epoch": 0.19213263384941598, "grad_norm": 6.520776450934488, "learning_rate": 9.326493232741092e-06, "loss": 17.5217, "step": 10511 }, { "epoch": 0.19215091304586251, "grad_norm": 7.317714389966107, "learning_rate": 9.326344847400466e-06, "loss": 17.874, "step": 10512 }, { "epoch": 0.19216919224230902, "grad_norm": 6.8801757926825875, "learning_rate": 9.326196446896377e-06, "loss": 17.6736, "step": 10513 }, { "epoch": 0.19218747143875556, "grad_norm": 5.998845084749652, "learning_rate": 9.326048031229346e-06, "loss": 17.5381, "step": 10514 }, { "epoch": 0.19220575063520207, "grad_norm": 7.2534001313545335, "learning_rate": 9.32589960039989e-06, "loss": 17.9485, "step": 10515 }, { "epoch": 0.1922240298316486, "grad_norm": 6.1874438901416156, "learning_rate": 9.325751154408534e-06, "loss": 17.2185, "step": 10516 }, { "epoch": 0.19224230902809514, "grad_norm": 7.784618928324922, "learning_rate": 9.325602693255793e-06, "loss": 18.0031, "step": 10517 }, { "epoch": 0.19226058822454165, "grad_norm": 5.117871695439608, "learning_rate": 9.325454216942192e-06, "loss": 16.8991, "step": 10518 }, { "epoch": 0.19227886742098818, "grad_norm": 6.289985694303816, "learning_rate": 9.325305725468248e-06, "loss": 17.4479, "step": 10519 }, { "epoch": 0.1922971466174347, "grad_norm": 7.29537081876363, "learning_rate": 9.325157218834481e-06, "loss": 18.2189, "step": 10520 }, { "epoch": 0.19231542581388122, "grad_norm": 6.799227527051088, "learning_rate": 9.325008697041418e-06, "loss": 17.4002, "step": 10521 }, { "epoch": 0.19233370501032773, "grad_norm": 8.351520976907086, "learning_rate": 9.324860160089571e-06, "loss": 17.766, "step": 10522 }, { "epoch": 0.19235198420677427, "grad_norm": 7.0551663389754555, "learning_rate": 9.324711607979466e-06, "loss": 17.8016, "step": 10523 }, { "epoch": 0.1923702634032208, "grad_norm": 6.490942895032392, "learning_rate": 9.324563040711621e-06, "loss": 17.4714, "step": 10524 }, { "epoch": 0.1923885425996673, "grad_norm": 7.953588425719637, "learning_rate": 9.32441445828656e-06, "loss": 18.4641, "step": 10525 }, { "epoch": 0.19240682179611385, "grad_norm": 5.234308943229955, "learning_rate": 9.3242658607048e-06, "loss": 16.9446, "step": 10526 }, { "epoch": 0.19242510099256036, "grad_norm": 7.309660964790849, "learning_rate": 9.324117247966863e-06, "loss": 17.6846, "step": 10527 }, { "epoch": 0.1924433801890069, "grad_norm": 7.29274034818036, "learning_rate": 9.323968620073271e-06, "loss": 18.0063, "step": 10528 }, { "epoch": 0.19246165938545343, "grad_norm": 7.037351986652799, "learning_rate": 9.323819977024545e-06, "loss": 17.624, "step": 10529 }, { "epoch": 0.19247993858189993, "grad_norm": 7.479671712839552, "learning_rate": 9.323671318821203e-06, "loss": 17.892, "step": 10530 }, { "epoch": 0.19249821777834647, "grad_norm": 9.330847404690038, "learning_rate": 9.32352264546377e-06, "loss": 18.7533, "step": 10531 }, { "epoch": 0.19251649697479298, "grad_norm": 6.156017588113379, "learning_rate": 9.323373956952764e-06, "loss": 17.4061, "step": 10532 }, { "epoch": 0.1925347761712395, "grad_norm": 6.484326782442109, "learning_rate": 9.323225253288709e-06, "loss": 17.5862, "step": 10533 }, { "epoch": 0.19255305536768605, "grad_norm": 7.395482460822145, "learning_rate": 9.323076534472123e-06, "loss": 18.0998, "step": 10534 }, { "epoch": 0.19257133456413256, "grad_norm": 7.022486083857744, "learning_rate": 9.322927800503529e-06, "loss": 17.7172, "step": 10535 }, { "epoch": 0.1925896137605791, "grad_norm": 6.440153027775848, "learning_rate": 9.32277905138345e-06, "loss": 17.2067, "step": 10536 }, { "epoch": 0.1926078929570256, "grad_norm": 6.646389189065471, "learning_rate": 9.322630287112404e-06, "loss": 17.7001, "step": 10537 }, { "epoch": 0.19262617215347214, "grad_norm": 6.233991271662693, "learning_rate": 9.322481507690916e-06, "loss": 17.4111, "step": 10538 }, { "epoch": 0.19264445134991864, "grad_norm": 7.583188301422039, "learning_rate": 9.322332713119501e-06, "loss": 18.0243, "step": 10539 }, { "epoch": 0.19266273054636518, "grad_norm": 6.985848731048602, "learning_rate": 9.322183903398689e-06, "loss": 17.4681, "step": 10540 }, { "epoch": 0.19268100974281172, "grad_norm": 6.944841568800823, "learning_rate": 9.322035078528996e-06, "loss": 17.6833, "step": 10541 }, { "epoch": 0.19269928893925822, "grad_norm": 6.769110657349653, "learning_rate": 9.321886238510945e-06, "loss": 18.0905, "step": 10542 }, { "epoch": 0.19271756813570476, "grad_norm": 6.175205785648062, "learning_rate": 9.321737383345059e-06, "loss": 17.5036, "step": 10543 }, { "epoch": 0.19273584733215127, "grad_norm": 7.39292483491582, "learning_rate": 9.321588513031857e-06, "loss": 18.1339, "step": 10544 }, { "epoch": 0.1927541265285978, "grad_norm": 7.62593299399902, "learning_rate": 9.321439627571863e-06, "loss": 17.6942, "step": 10545 }, { "epoch": 0.19277240572504434, "grad_norm": 7.852047992226749, "learning_rate": 9.321290726965598e-06, "loss": 18.4223, "step": 10546 }, { "epoch": 0.19279068492149085, "grad_norm": 6.79943435019734, "learning_rate": 9.321141811213582e-06, "loss": 17.6777, "step": 10547 }, { "epoch": 0.19280896411793738, "grad_norm": 5.801317405164884, "learning_rate": 9.320992880316342e-06, "loss": 17.3783, "step": 10548 }, { "epoch": 0.1928272433143839, "grad_norm": 11.13761053265287, "learning_rate": 9.320843934274396e-06, "loss": 18.3038, "step": 10549 }, { "epoch": 0.19284552251083042, "grad_norm": 7.955616697808509, "learning_rate": 9.320694973088267e-06, "loss": 18.0079, "step": 10550 }, { "epoch": 0.19286380170727696, "grad_norm": 8.347267151769406, "learning_rate": 9.320545996758477e-06, "loss": 18.0559, "step": 10551 }, { "epoch": 0.19288208090372347, "grad_norm": 6.7086603689711115, "learning_rate": 9.320397005285548e-06, "loss": 17.5578, "step": 10552 }, { "epoch": 0.19290036010017, "grad_norm": 6.701291688077603, "learning_rate": 9.320247998670003e-06, "loss": 17.3163, "step": 10553 }, { "epoch": 0.1929186392966165, "grad_norm": 6.043578464403494, "learning_rate": 9.320098976912362e-06, "loss": 17.2764, "step": 10554 }, { "epoch": 0.19293691849306305, "grad_norm": 6.42510153394605, "learning_rate": 9.319949940013149e-06, "loss": 17.6753, "step": 10555 }, { "epoch": 0.19295519768950956, "grad_norm": 7.745886185222929, "learning_rate": 9.319800887972887e-06, "loss": 18.0879, "step": 10556 }, { "epoch": 0.1929734768859561, "grad_norm": 7.468077190633721, "learning_rate": 9.3196518207921e-06, "loss": 17.7699, "step": 10557 }, { "epoch": 0.19299175608240263, "grad_norm": 7.070802593273419, "learning_rate": 9.319502738471304e-06, "loss": 18.0692, "step": 10558 }, { "epoch": 0.19301003527884913, "grad_norm": 6.909086042391009, "learning_rate": 9.319353641011028e-06, "loss": 17.9103, "step": 10559 }, { "epoch": 0.19302831447529567, "grad_norm": 7.037366292756734, "learning_rate": 9.319204528411794e-06, "loss": 17.8153, "step": 10560 }, { "epoch": 0.19304659367174218, "grad_norm": 7.930576037493707, "learning_rate": 9.31905540067412e-06, "loss": 18.2639, "step": 10561 }, { "epoch": 0.1930648728681887, "grad_norm": 6.254364915771636, "learning_rate": 9.318906257798533e-06, "loss": 17.3226, "step": 10562 }, { "epoch": 0.19308315206463525, "grad_norm": 7.822969907801438, "learning_rate": 9.318757099785554e-06, "loss": 18.1452, "step": 10563 }, { "epoch": 0.19310143126108176, "grad_norm": 5.464234538796677, "learning_rate": 9.318607926635708e-06, "loss": 17.168, "step": 10564 }, { "epoch": 0.1931197104575283, "grad_norm": 7.450394358946171, "learning_rate": 9.318458738349514e-06, "loss": 18.4264, "step": 10565 }, { "epoch": 0.1931379896539748, "grad_norm": 6.567630052692855, "learning_rate": 9.318309534927496e-06, "loss": 17.5298, "step": 10566 }, { "epoch": 0.19315626885042134, "grad_norm": 6.996251927452832, "learning_rate": 9.31816031637018e-06, "loss": 17.6233, "step": 10567 }, { "epoch": 0.19317454804686787, "grad_norm": 7.19843337457532, "learning_rate": 9.318011082678084e-06, "loss": 17.7532, "step": 10568 }, { "epoch": 0.19319282724331438, "grad_norm": 8.16279084086477, "learning_rate": 9.317861833851737e-06, "loss": 18.0928, "step": 10569 }, { "epoch": 0.19321110643976092, "grad_norm": 6.8381809249343215, "learning_rate": 9.317712569891656e-06, "loss": 17.8484, "step": 10570 }, { "epoch": 0.19322938563620742, "grad_norm": 6.215993573824501, "learning_rate": 9.31756329079837e-06, "loss": 17.2163, "step": 10571 }, { "epoch": 0.19324766483265396, "grad_norm": 8.464180348844982, "learning_rate": 9.317413996572398e-06, "loss": 17.8885, "step": 10572 }, { "epoch": 0.19326594402910047, "grad_norm": 5.687433583149414, "learning_rate": 9.317264687214266e-06, "loss": 17.083, "step": 10573 }, { "epoch": 0.193284223225547, "grad_norm": 6.170371554072229, "learning_rate": 9.317115362724492e-06, "loss": 17.3101, "step": 10574 }, { "epoch": 0.19330250242199354, "grad_norm": 6.302181705482549, "learning_rate": 9.316966023103606e-06, "loss": 17.6045, "step": 10575 }, { "epoch": 0.19332078161844005, "grad_norm": 6.693383226821278, "learning_rate": 9.316816668352129e-06, "loss": 17.4768, "step": 10576 }, { "epoch": 0.19333906081488658, "grad_norm": 6.738883879953232, "learning_rate": 9.316667298470583e-06, "loss": 17.6461, "step": 10577 }, { "epoch": 0.1933573400113331, "grad_norm": 6.650902543790805, "learning_rate": 9.316517913459495e-06, "loss": 17.4843, "step": 10578 }, { "epoch": 0.19337561920777963, "grad_norm": 5.945529606703904, "learning_rate": 9.316368513319383e-06, "loss": 17.3006, "step": 10579 }, { "epoch": 0.19339389840422616, "grad_norm": 6.5654109763194315, "learning_rate": 9.316219098050777e-06, "loss": 17.4598, "step": 10580 }, { "epoch": 0.19341217760067267, "grad_norm": 9.49438091067944, "learning_rate": 9.316069667654196e-06, "loss": 18.5121, "step": 10581 }, { "epoch": 0.1934304567971192, "grad_norm": 7.18090129984022, "learning_rate": 9.315920222130163e-06, "loss": 17.6947, "step": 10582 }, { "epoch": 0.1934487359935657, "grad_norm": 8.277954961005245, "learning_rate": 9.315770761479209e-06, "loss": 18.0253, "step": 10583 }, { "epoch": 0.19346701519001225, "grad_norm": 9.693217376877973, "learning_rate": 9.31562128570185e-06, "loss": 18.3004, "step": 10584 }, { "epoch": 0.19348529438645878, "grad_norm": 7.771114929456253, "learning_rate": 9.315471794798614e-06, "loss": 17.9545, "step": 10585 }, { "epoch": 0.1935035735829053, "grad_norm": 6.674895642955014, "learning_rate": 9.315322288770024e-06, "loss": 17.8881, "step": 10586 }, { "epoch": 0.19352185277935183, "grad_norm": 6.407049298848421, "learning_rate": 9.315172767616602e-06, "loss": 17.1966, "step": 10587 }, { "epoch": 0.19354013197579834, "grad_norm": 6.797248276208662, "learning_rate": 9.315023231338875e-06, "loss": 17.5365, "step": 10588 }, { "epoch": 0.19355841117224487, "grad_norm": 7.642996263755199, "learning_rate": 9.314873679937366e-06, "loss": 18.1229, "step": 10589 }, { "epoch": 0.19357669036869138, "grad_norm": 6.3013328966463655, "learning_rate": 9.314724113412599e-06, "loss": 17.155, "step": 10590 }, { "epoch": 0.19359496956513791, "grad_norm": 7.847098323725367, "learning_rate": 9.3145745317651e-06, "loss": 18.1772, "step": 10591 }, { "epoch": 0.19361324876158445, "grad_norm": 7.028199647753349, "learning_rate": 9.31442493499539e-06, "loss": 17.6506, "step": 10592 }, { "epoch": 0.19363152795803096, "grad_norm": 7.597175193959776, "learning_rate": 9.314275323103994e-06, "loss": 17.9633, "step": 10593 }, { "epoch": 0.1936498071544775, "grad_norm": 7.568322940690469, "learning_rate": 9.31412569609144e-06, "loss": 18.1824, "step": 10594 }, { "epoch": 0.193668086350924, "grad_norm": 7.226247408972656, "learning_rate": 9.313976053958249e-06, "loss": 17.7692, "step": 10595 }, { "epoch": 0.19368636554737054, "grad_norm": 7.776126565740525, "learning_rate": 9.313826396704945e-06, "loss": 18.2208, "step": 10596 }, { "epoch": 0.19370464474381707, "grad_norm": 6.3933084635983, "learning_rate": 9.313676724332054e-06, "loss": 17.4391, "step": 10597 }, { "epoch": 0.19372292394026358, "grad_norm": 6.872603044415165, "learning_rate": 9.313527036840103e-06, "loss": 17.7786, "step": 10598 }, { "epoch": 0.19374120313671012, "grad_norm": 6.527531981879228, "learning_rate": 9.31337733422961e-06, "loss": 17.4212, "step": 10599 }, { "epoch": 0.19375948233315662, "grad_norm": 6.301659271412311, "learning_rate": 9.313227616501106e-06, "loss": 17.3434, "step": 10600 }, { "epoch": 0.19377776152960316, "grad_norm": 7.712829624281709, "learning_rate": 9.313077883655112e-06, "loss": 18.0235, "step": 10601 }, { "epoch": 0.1937960407260497, "grad_norm": 6.327685086474394, "learning_rate": 9.312928135692156e-06, "loss": 17.269, "step": 10602 }, { "epoch": 0.1938143199224962, "grad_norm": 7.151033400204553, "learning_rate": 9.312778372612761e-06, "loss": 17.8822, "step": 10603 }, { "epoch": 0.19383259911894274, "grad_norm": 5.65434615832553, "learning_rate": 9.312628594417452e-06, "loss": 16.9864, "step": 10604 }, { "epoch": 0.19385087831538925, "grad_norm": 7.526213554422045, "learning_rate": 9.312478801106754e-06, "loss": 18.1226, "step": 10605 }, { "epoch": 0.19386915751183578, "grad_norm": 7.449896069951539, "learning_rate": 9.312328992681191e-06, "loss": 18.0462, "step": 10606 }, { "epoch": 0.1938874367082823, "grad_norm": 7.313314532439838, "learning_rate": 9.312179169141292e-06, "loss": 17.7867, "step": 10607 }, { "epoch": 0.19390571590472883, "grad_norm": 7.535135216513795, "learning_rate": 9.312029330487576e-06, "loss": 17.8511, "step": 10608 }, { "epoch": 0.19392399510117536, "grad_norm": 7.6273741504347985, "learning_rate": 9.311879476720572e-06, "loss": 18.0612, "step": 10609 }, { "epoch": 0.19394227429762187, "grad_norm": 7.3073291992089615, "learning_rate": 9.311729607840804e-06, "loss": 17.8439, "step": 10610 }, { "epoch": 0.1939605534940684, "grad_norm": 9.447661877803624, "learning_rate": 9.3115797238488e-06, "loss": 18.8587, "step": 10611 }, { "epoch": 0.1939788326905149, "grad_norm": 6.56812485016018, "learning_rate": 9.311429824745082e-06, "loss": 17.6294, "step": 10612 }, { "epoch": 0.19399711188696145, "grad_norm": 6.583498507950827, "learning_rate": 9.311279910530177e-06, "loss": 17.5195, "step": 10613 }, { "epoch": 0.19401539108340798, "grad_norm": 6.368004010018813, "learning_rate": 9.31112998120461e-06, "loss": 17.4466, "step": 10614 }, { "epoch": 0.1940336702798545, "grad_norm": 6.474764642472075, "learning_rate": 9.310980036768905e-06, "loss": 17.5982, "step": 10615 }, { "epoch": 0.19405194947630103, "grad_norm": 6.380601550493955, "learning_rate": 9.310830077223592e-06, "loss": 17.4736, "step": 10616 }, { "epoch": 0.19407022867274754, "grad_norm": 7.878257561781664, "learning_rate": 9.310680102569192e-06, "loss": 17.8663, "step": 10617 }, { "epoch": 0.19408850786919407, "grad_norm": 7.34069814682072, "learning_rate": 9.310530112806232e-06, "loss": 17.8274, "step": 10618 }, { "epoch": 0.1941067870656406, "grad_norm": 8.218669835162492, "learning_rate": 9.310380107935238e-06, "loss": 17.9432, "step": 10619 }, { "epoch": 0.19412506626208711, "grad_norm": 7.3994748891972675, "learning_rate": 9.310230087956736e-06, "loss": 18.1808, "step": 10620 }, { "epoch": 0.19414334545853365, "grad_norm": 5.658178055925301, "learning_rate": 9.310080052871252e-06, "loss": 17.1895, "step": 10621 }, { "epoch": 0.19416162465498016, "grad_norm": 9.654160263039296, "learning_rate": 9.30993000267931e-06, "loss": 18.5662, "step": 10622 }, { "epoch": 0.1941799038514267, "grad_norm": 6.784352251926139, "learning_rate": 9.30977993738144e-06, "loss": 17.7657, "step": 10623 }, { "epoch": 0.1941981830478732, "grad_norm": 7.395272450714351, "learning_rate": 9.309629856978163e-06, "loss": 17.5341, "step": 10624 }, { "epoch": 0.19421646224431974, "grad_norm": 6.801140426252479, "learning_rate": 9.309479761470008e-06, "loss": 17.6007, "step": 10625 }, { "epoch": 0.19423474144076627, "grad_norm": 6.93117260309201, "learning_rate": 9.309329650857501e-06, "loss": 17.8933, "step": 10626 }, { "epoch": 0.19425302063721278, "grad_norm": 9.095551821382614, "learning_rate": 9.309179525141167e-06, "loss": 18.7848, "step": 10627 }, { "epoch": 0.19427129983365932, "grad_norm": 8.740240869200871, "learning_rate": 9.309029384321533e-06, "loss": 18.1465, "step": 10628 }, { "epoch": 0.19428957903010582, "grad_norm": 6.321263675952195, "learning_rate": 9.308879228399123e-06, "loss": 17.6637, "step": 10629 }, { "epoch": 0.19430785822655236, "grad_norm": 7.488813766017334, "learning_rate": 9.308729057374468e-06, "loss": 18.1155, "step": 10630 }, { "epoch": 0.1943261374229989, "grad_norm": 6.80232486213669, "learning_rate": 9.30857887124809e-06, "loss": 17.3481, "step": 10631 }, { "epoch": 0.1943444166194454, "grad_norm": 6.272396843005326, "learning_rate": 9.308428670020517e-06, "loss": 17.3226, "step": 10632 }, { "epoch": 0.19436269581589194, "grad_norm": 11.094605897917983, "learning_rate": 9.308278453692277e-06, "loss": 18.3589, "step": 10633 }, { "epoch": 0.19438097501233845, "grad_norm": 9.03773670620496, "learning_rate": 9.308128222263893e-06, "loss": 18.6966, "step": 10634 }, { "epoch": 0.19439925420878498, "grad_norm": 6.290513129643883, "learning_rate": 9.307977975735894e-06, "loss": 17.4824, "step": 10635 }, { "epoch": 0.19441753340523152, "grad_norm": 6.140177837419432, "learning_rate": 9.307827714108805e-06, "loss": 17.5921, "step": 10636 }, { "epoch": 0.19443581260167803, "grad_norm": 6.344802791878528, "learning_rate": 9.307677437383156e-06, "loss": 17.4377, "step": 10637 }, { "epoch": 0.19445409179812456, "grad_norm": 7.6818635390571, "learning_rate": 9.30752714555947e-06, "loss": 17.8808, "step": 10638 }, { "epoch": 0.19447237099457107, "grad_norm": 7.467567891909981, "learning_rate": 9.307376838638274e-06, "loss": 17.9963, "step": 10639 }, { "epoch": 0.1944906501910176, "grad_norm": 7.601758098061826, "learning_rate": 9.307226516620096e-06, "loss": 18.4309, "step": 10640 }, { "epoch": 0.1945089293874641, "grad_norm": 6.949781637654198, "learning_rate": 9.307076179505466e-06, "loss": 17.7668, "step": 10641 }, { "epoch": 0.19452720858391065, "grad_norm": 6.344390623967537, "learning_rate": 9.306925827294905e-06, "loss": 17.6243, "step": 10642 }, { "epoch": 0.19454548778035718, "grad_norm": 6.121671076286463, "learning_rate": 9.306775459988944e-06, "loss": 17.3048, "step": 10643 }, { "epoch": 0.1945637669768037, "grad_norm": 6.634782358505514, "learning_rate": 9.306625077588108e-06, "loss": 17.1535, "step": 10644 }, { "epoch": 0.19458204617325023, "grad_norm": 7.101410142798328, "learning_rate": 9.306474680092925e-06, "loss": 17.5748, "step": 10645 }, { "epoch": 0.19460032536969674, "grad_norm": 7.109025070919761, "learning_rate": 9.306324267503921e-06, "loss": 17.6979, "step": 10646 }, { "epoch": 0.19461860456614327, "grad_norm": 7.028477675482064, "learning_rate": 9.306173839821626e-06, "loss": 17.587, "step": 10647 }, { "epoch": 0.1946368837625898, "grad_norm": 6.6055027926956145, "learning_rate": 9.306023397046564e-06, "loss": 17.51, "step": 10648 }, { "epoch": 0.19465516295903632, "grad_norm": 8.327068170217741, "learning_rate": 9.305872939179263e-06, "loss": 18.2495, "step": 10649 }, { "epoch": 0.19467344215548285, "grad_norm": 6.141988982849482, "learning_rate": 9.305722466220253e-06, "loss": 17.316, "step": 10650 }, { "epoch": 0.19469172135192936, "grad_norm": 5.932637633055441, "learning_rate": 9.305571978170058e-06, "loss": 17.3231, "step": 10651 }, { "epoch": 0.1947100005483759, "grad_norm": 7.503772590917745, "learning_rate": 9.305421475029209e-06, "loss": 17.8163, "step": 10652 }, { "epoch": 0.19472827974482243, "grad_norm": 6.695506480582686, "learning_rate": 9.30527095679823e-06, "loss": 17.4673, "step": 10653 }, { "epoch": 0.19474655894126894, "grad_norm": 6.662264358726758, "learning_rate": 9.305120423477647e-06, "loss": 17.9, "step": 10654 }, { "epoch": 0.19476483813771547, "grad_norm": 6.896771157978884, "learning_rate": 9.304969875067995e-06, "loss": 17.7757, "step": 10655 }, { "epoch": 0.19478311733416198, "grad_norm": 6.460014333683582, "learning_rate": 9.304819311569794e-06, "loss": 17.4464, "step": 10656 }, { "epoch": 0.19480139653060852, "grad_norm": 6.586314892185982, "learning_rate": 9.304668732983576e-06, "loss": 17.4563, "step": 10657 }, { "epoch": 0.19481967572705503, "grad_norm": 7.077086928979827, "learning_rate": 9.304518139309869e-06, "loss": 17.83, "step": 10658 }, { "epoch": 0.19483795492350156, "grad_norm": 6.568784637257991, "learning_rate": 9.304367530549197e-06, "loss": 17.7367, "step": 10659 }, { "epoch": 0.1948562341199481, "grad_norm": 8.723658430859082, "learning_rate": 9.304216906702092e-06, "loss": 17.9172, "step": 10660 }, { "epoch": 0.1948745133163946, "grad_norm": 7.5472534182362745, "learning_rate": 9.304066267769078e-06, "loss": 17.334, "step": 10661 }, { "epoch": 0.19489279251284114, "grad_norm": 7.6130460575294965, "learning_rate": 9.303915613750689e-06, "loss": 17.8953, "step": 10662 }, { "epoch": 0.19491107170928765, "grad_norm": 6.632862241041892, "learning_rate": 9.303764944647447e-06, "loss": 17.3226, "step": 10663 }, { "epoch": 0.19492935090573418, "grad_norm": 6.255270723640296, "learning_rate": 9.303614260459882e-06, "loss": 17.2681, "step": 10664 }, { "epoch": 0.19494763010218072, "grad_norm": 6.770546445817632, "learning_rate": 9.303463561188522e-06, "loss": 17.4723, "step": 10665 }, { "epoch": 0.19496590929862723, "grad_norm": 6.307885423130489, "learning_rate": 9.303312846833897e-06, "loss": 17.2604, "step": 10666 }, { "epoch": 0.19498418849507376, "grad_norm": 6.301987745633895, "learning_rate": 9.303162117396534e-06, "loss": 17.5328, "step": 10667 }, { "epoch": 0.19500246769152027, "grad_norm": 5.773298416189256, "learning_rate": 9.30301137287696e-06, "loss": 17.0753, "step": 10668 }, { "epoch": 0.1950207468879668, "grad_norm": 8.46297537772062, "learning_rate": 9.302860613275705e-06, "loss": 18.3674, "step": 10669 }, { "epoch": 0.19503902608441334, "grad_norm": 6.870016457950988, "learning_rate": 9.302709838593299e-06, "loss": 17.4719, "step": 10670 }, { "epoch": 0.19505730528085985, "grad_norm": 6.93741752124257, "learning_rate": 9.302559048830266e-06, "loss": 17.7941, "step": 10671 }, { "epoch": 0.19507558447730639, "grad_norm": 6.897580639313483, "learning_rate": 9.302408243987137e-06, "loss": 17.8397, "step": 10672 }, { "epoch": 0.1950938636737529, "grad_norm": 7.6283759654674625, "learning_rate": 9.302257424064441e-06, "loss": 17.693, "step": 10673 }, { "epoch": 0.19511214287019943, "grad_norm": 9.701676756354317, "learning_rate": 9.302106589062705e-06, "loss": 18.6908, "step": 10674 }, { "epoch": 0.19513042206664594, "grad_norm": 6.684327878630932, "learning_rate": 9.30195573898246e-06, "loss": 17.3524, "step": 10675 }, { "epoch": 0.19514870126309247, "grad_norm": 8.020973963191935, "learning_rate": 9.301804873824234e-06, "loss": 17.8621, "step": 10676 }, { "epoch": 0.195166980459539, "grad_norm": 5.992558588825945, "learning_rate": 9.301653993588554e-06, "loss": 17.4288, "step": 10677 }, { "epoch": 0.19518525965598552, "grad_norm": 6.253317221883683, "learning_rate": 9.30150309827595e-06, "loss": 17.5206, "step": 10678 }, { "epoch": 0.19520353885243205, "grad_norm": 6.709397417090899, "learning_rate": 9.301352187886952e-06, "loss": 17.6034, "step": 10679 }, { "epoch": 0.19522181804887856, "grad_norm": 6.5657654728391535, "learning_rate": 9.301201262422086e-06, "loss": 17.66, "step": 10680 }, { "epoch": 0.1952400972453251, "grad_norm": 8.283829826030313, "learning_rate": 9.301050321881884e-06, "loss": 18.0831, "step": 10681 }, { "epoch": 0.19525837644177163, "grad_norm": 7.298913795471017, "learning_rate": 9.300899366266875e-06, "loss": 17.2761, "step": 10682 }, { "epoch": 0.19527665563821814, "grad_norm": 9.970929517780979, "learning_rate": 9.300748395577585e-06, "loss": 18.5915, "step": 10683 }, { "epoch": 0.19529493483466467, "grad_norm": 7.506747819785145, "learning_rate": 9.300597409814546e-06, "loss": 17.8649, "step": 10684 }, { "epoch": 0.19531321403111118, "grad_norm": 7.590362488356904, "learning_rate": 9.300446408978285e-06, "loss": 17.9497, "step": 10685 }, { "epoch": 0.19533149322755772, "grad_norm": 7.031810394509858, "learning_rate": 9.300295393069333e-06, "loss": 17.7331, "step": 10686 }, { "epoch": 0.19534977242400425, "grad_norm": 6.029242218241611, "learning_rate": 9.300144362088221e-06, "loss": 17.5721, "step": 10687 }, { "epoch": 0.19536805162045076, "grad_norm": 5.882413409238314, "learning_rate": 9.299993316035474e-06, "loss": 17.5585, "step": 10688 }, { "epoch": 0.1953863308168973, "grad_norm": 6.850432216812345, "learning_rate": 9.299842254911625e-06, "loss": 17.7273, "step": 10689 }, { "epoch": 0.1954046100133438, "grad_norm": 5.193430594848009, "learning_rate": 9.2996911787172e-06, "loss": 17.0182, "step": 10690 }, { "epoch": 0.19542288920979034, "grad_norm": 6.219183428513139, "learning_rate": 9.299540087452732e-06, "loss": 17.4285, "step": 10691 }, { "epoch": 0.19544116840623685, "grad_norm": 6.628865620959073, "learning_rate": 9.299388981118749e-06, "loss": 17.6518, "step": 10692 }, { "epoch": 0.19545944760268338, "grad_norm": 6.177767982169759, "learning_rate": 9.29923785971578e-06, "loss": 17.3978, "step": 10693 }, { "epoch": 0.19547772679912992, "grad_norm": 7.3571905538615, "learning_rate": 9.299086723244358e-06, "loss": 17.9955, "step": 10694 }, { "epoch": 0.19549600599557643, "grad_norm": 7.729865331947313, "learning_rate": 9.298935571705005e-06, "loss": 17.8947, "step": 10695 }, { "epoch": 0.19551428519202296, "grad_norm": 6.118071552231917, "learning_rate": 9.29878440509826e-06, "loss": 17.1034, "step": 10696 }, { "epoch": 0.19553256438846947, "grad_norm": 5.95000903021858, "learning_rate": 9.298633223424647e-06, "loss": 17.2047, "step": 10697 }, { "epoch": 0.195550843584916, "grad_norm": 6.843863320766885, "learning_rate": 9.298482026684699e-06, "loss": 17.8315, "step": 10698 }, { "epoch": 0.19556912278136254, "grad_norm": 6.487031716869424, "learning_rate": 9.298330814878944e-06, "loss": 17.5716, "step": 10699 }, { "epoch": 0.19558740197780905, "grad_norm": 7.355481764198525, "learning_rate": 9.298179588007912e-06, "loss": 17.663, "step": 10700 }, { "epoch": 0.19560568117425559, "grad_norm": 6.468301288869318, "learning_rate": 9.298028346072132e-06, "loss": 17.4242, "step": 10701 }, { "epoch": 0.1956239603707021, "grad_norm": 7.243010837289347, "learning_rate": 9.297877089072138e-06, "loss": 18.0509, "step": 10702 }, { "epoch": 0.19564223956714863, "grad_norm": 7.9735117004437805, "learning_rate": 9.297725817008458e-06, "loss": 17.4712, "step": 10703 }, { "epoch": 0.19566051876359516, "grad_norm": 6.844671694673614, "learning_rate": 9.297574529881619e-06, "loss": 17.5793, "step": 10704 }, { "epoch": 0.19567879796004167, "grad_norm": 6.642715710082763, "learning_rate": 9.297423227692158e-06, "loss": 17.3391, "step": 10705 }, { "epoch": 0.1956970771564882, "grad_norm": 7.867678656114496, "learning_rate": 9.297271910440598e-06, "loss": 18.1073, "step": 10706 }, { "epoch": 0.19571535635293472, "grad_norm": 6.688362811911548, "learning_rate": 9.297120578127474e-06, "loss": 17.4292, "step": 10707 }, { "epoch": 0.19573363554938125, "grad_norm": 7.8432901584227945, "learning_rate": 9.296969230753316e-06, "loss": 18.1777, "step": 10708 }, { "epoch": 0.19575191474582776, "grad_norm": 6.725996842444234, "learning_rate": 9.296817868318653e-06, "loss": 17.6476, "step": 10709 }, { "epoch": 0.1957701939422743, "grad_norm": 8.875850812368208, "learning_rate": 9.296666490824018e-06, "loss": 18.4301, "step": 10710 }, { "epoch": 0.19578847313872083, "grad_norm": 6.075179535741035, "learning_rate": 9.296515098269938e-06, "loss": 17.2243, "step": 10711 }, { "epoch": 0.19580675233516734, "grad_norm": 6.834905199062873, "learning_rate": 9.296363690656947e-06, "loss": 17.6401, "step": 10712 }, { "epoch": 0.19582503153161387, "grad_norm": 5.901095979828014, "learning_rate": 9.296212267985572e-06, "loss": 17.2658, "step": 10713 }, { "epoch": 0.19584331072806038, "grad_norm": 6.3500574552408455, "learning_rate": 9.296060830256346e-06, "loss": 17.3889, "step": 10714 }, { "epoch": 0.19586158992450692, "grad_norm": 6.91711244092669, "learning_rate": 9.2959093774698e-06, "loss": 17.9106, "step": 10715 }, { "epoch": 0.19587986912095345, "grad_norm": 6.207672715333535, "learning_rate": 9.295757909626465e-06, "loss": 17.1979, "step": 10716 }, { "epoch": 0.19589814831739996, "grad_norm": 6.810790272390377, "learning_rate": 9.29560642672687e-06, "loss": 17.5168, "step": 10717 }, { "epoch": 0.1959164275138465, "grad_norm": 7.976867102812035, "learning_rate": 9.29545492877155e-06, "loss": 18.029, "step": 10718 }, { "epoch": 0.195934706710293, "grad_norm": 7.261506383099081, "learning_rate": 9.295303415761032e-06, "loss": 17.7888, "step": 10719 }, { "epoch": 0.19595298590673954, "grad_norm": 7.008392962604296, "learning_rate": 9.295151887695846e-06, "loss": 17.5551, "step": 10720 }, { "epoch": 0.19597126510318608, "grad_norm": 8.457848839474217, "learning_rate": 9.295000344576528e-06, "loss": 17.6012, "step": 10721 }, { "epoch": 0.19598954429963258, "grad_norm": 6.2744108798805955, "learning_rate": 9.294848786403605e-06, "loss": 17.4231, "step": 10722 }, { "epoch": 0.19600782349607912, "grad_norm": 7.999772964827329, "learning_rate": 9.294697213177611e-06, "loss": 18.4582, "step": 10723 }, { "epoch": 0.19602610269252563, "grad_norm": 5.8756792379465805, "learning_rate": 9.294545624899076e-06, "loss": 17.3341, "step": 10724 }, { "epoch": 0.19604438188897216, "grad_norm": 7.126986733708883, "learning_rate": 9.294394021568529e-06, "loss": 17.692, "step": 10725 }, { "epoch": 0.19606266108541867, "grad_norm": 7.765735288662383, "learning_rate": 9.294242403186507e-06, "loss": 17.8931, "step": 10726 }, { "epoch": 0.1960809402818652, "grad_norm": 7.604591023698333, "learning_rate": 9.294090769753534e-06, "loss": 17.8412, "step": 10727 }, { "epoch": 0.19609921947831174, "grad_norm": 6.772845344253822, "learning_rate": 9.293939121270148e-06, "loss": 17.6838, "step": 10728 }, { "epoch": 0.19611749867475825, "grad_norm": 8.594384432462094, "learning_rate": 9.293787457736878e-06, "loss": 18.3965, "step": 10729 }, { "epoch": 0.1961357778712048, "grad_norm": 6.806616763621282, "learning_rate": 9.293635779154255e-06, "loss": 17.5861, "step": 10730 }, { "epoch": 0.1961540570676513, "grad_norm": 6.43974598625156, "learning_rate": 9.293484085522812e-06, "loss": 17.3388, "step": 10731 }, { "epoch": 0.19617233626409783, "grad_norm": 7.452707094156926, "learning_rate": 9.293332376843078e-06, "loss": 17.93, "step": 10732 }, { "epoch": 0.19619061546054437, "grad_norm": 5.686505492084528, "learning_rate": 9.293180653115587e-06, "loss": 17.1631, "step": 10733 }, { "epoch": 0.19620889465699087, "grad_norm": 5.864712950618436, "learning_rate": 9.293028914340873e-06, "loss": 17.1778, "step": 10734 }, { "epoch": 0.1962271738534374, "grad_norm": 5.693583083329729, "learning_rate": 9.292877160519463e-06, "loss": 17.0784, "step": 10735 }, { "epoch": 0.19624545304988392, "grad_norm": 6.146937285583666, "learning_rate": 9.29272539165189e-06, "loss": 17.2606, "step": 10736 }, { "epoch": 0.19626373224633045, "grad_norm": 5.731994226922119, "learning_rate": 9.292573607738688e-06, "loss": 17.3579, "step": 10737 }, { "epoch": 0.196282011442777, "grad_norm": 6.736091173241974, "learning_rate": 9.292421808780389e-06, "loss": 17.7158, "step": 10738 }, { "epoch": 0.1963002906392235, "grad_norm": 8.56794848346165, "learning_rate": 9.292269994777524e-06, "loss": 18.3193, "step": 10739 }, { "epoch": 0.19631856983567003, "grad_norm": 5.8844783678847685, "learning_rate": 9.292118165730625e-06, "loss": 17.149, "step": 10740 }, { "epoch": 0.19633684903211654, "grad_norm": 5.790033531735474, "learning_rate": 9.291966321640223e-06, "loss": 17.1911, "step": 10741 }, { "epoch": 0.19635512822856308, "grad_norm": 9.037962856954309, "learning_rate": 9.291814462506852e-06, "loss": 18.7479, "step": 10742 }, { "epoch": 0.19637340742500958, "grad_norm": 10.393617610573418, "learning_rate": 9.291662588331046e-06, "loss": 18.0216, "step": 10743 }, { "epoch": 0.19639168662145612, "grad_norm": 7.880354051662654, "learning_rate": 9.291510699113332e-06, "loss": 18.13, "step": 10744 }, { "epoch": 0.19640996581790265, "grad_norm": 6.649269674339112, "learning_rate": 9.291358794854246e-06, "loss": 17.7472, "step": 10745 }, { "epoch": 0.19642824501434916, "grad_norm": 6.24457974277704, "learning_rate": 9.29120687555432e-06, "loss": 17.4464, "step": 10746 }, { "epoch": 0.1964465242107957, "grad_norm": 5.379803587595591, "learning_rate": 9.291054941214087e-06, "loss": 17.0152, "step": 10747 }, { "epoch": 0.1964648034072422, "grad_norm": 7.007398276187704, "learning_rate": 9.290902991834078e-06, "loss": 17.7299, "step": 10748 }, { "epoch": 0.19648308260368874, "grad_norm": 7.699192976521023, "learning_rate": 9.290751027414828e-06, "loss": 17.9241, "step": 10749 }, { "epoch": 0.19650136180013528, "grad_norm": 5.54371662902325, "learning_rate": 9.290599047956865e-06, "loss": 17.1983, "step": 10750 }, { "epoch": 0.19651964099658178, "grad_norm": 6.092367698307502, "learning_rate": 9.290447053460727e-06, "loss": 17.5637, "step": 10751 }, { "epoch": 0.19653792019302832, "grad_norm": 6.810666588395866, "learning_rate": 9.290295043926945e-06, "loss": 17.5853, "step": 10752 }, { "epoch": 0.19655619938947483, "grad_norm": 7.0641765133106285, "learning_rate": 9.290143019356048e-06, "loss": 17.8393, "step": 10753 }, { "epoch": 0.19657447858592136, "grad_norm": 7.402254855193758, "learning_rate": 9.289990979748575e-06, "loss": 17.8503, "step": 10754 }, { "epoch": 0.1965927577823679, "grad_norm": 6.726314999452945, "learning_rate": 9.289838925105054e-06, "loss": 17.6592, "step": 10755 }, { "epoch": 0.1966110369788144, "grad_norm": 5.848487032895969, "learning_rate": 9.28968685542602e-06, "loss": 17.1868, "step": 10756 }, { "epoch": 0.19662931617526094, "grad_norm": 7.762407867966833, "learning_rate": 9.289534770712007e-06, "loss": 17.8981, "step": 10757 }, { "epoch": 0.19664759537170745, "grad_norm": 6.529578507189958, "learning_rate": 9.289382670963548e-06, "loss": 17.4256, "step": 10758 }, { "epoch": 0.196665874568154, "grad_norm": 7.857419411920845, "learning_rate": 9.289230556181172e-06, "loss": 18.0849, "step": 10759 }, { "epoch": 0.1966841537646005, "grad_norm": 7.205082738089403, "learning_rate": 9.289078426365417e-06, "loss": 17.5877, "step": 10760 }, { "epoch": 0.19670243296104703, "grad_norm": 5.807288445967445, "learning_rate": 9.288926281516812e-06, "loss": 17.2288, "step": 10761 }, { "epoch": 0.19672071215749357, "grad_norm": 8.342745064606985, "learning_rate": 9.288774121635895e-06, "loss": 17.6209, "step": 10762 }, { "epoch": 0.19673899135394007, "grad_norm": 7.180800065658491, "learning_rate": 9.288621946723196e-06, "loss": 17.6955, "step": 10763 }, { "epoch": 0.1967572705503866, "grad_norm": 6.565589161064451, "learning_rate": 9.28846975677925e-06, "loss": 17.5578, "step": 10764 }, { "epoch": 0.19677554974683312, "grad_norm": 7.649048599746578, "learning_rate": 9.28831755180459e-06, "loss": 18.1785, "step": 10765 }, { "epoch": 0.19679382894327965, "grad_norm": 6.6356404509470694, "learning_rate": 9.288165331799746e-06, "loss": 17.6297, "step": 10766 }, { "epoch": 0.1968121081397262, "grad_norm": 7.221765529876187, "learning_rate": 9.288013096765258e-06, "loss": 17.8655, "step": 10767 }, { "epoch": 0.1968303873361727, "grad_norm": 6.003297464221561, "learning_rate": 9.287860846701654e-06, "loss": 17.2949, "step": 10768 }, { "epoch": 0.19684866653261923, "grad_norm": 6.526098385174477, "learning_rate": 9.287708581609472e-06, "loss": 17.2405, "step": 10769 }, { "epoch": 0.19686694572906574, "grad_norm": 7.148816755709115, "learning_rate": 9.28755630148924e-06, "loss": 17.6601, "step": 10770 }, { "epoch": 0.19688522492551228, "grad_norm": 6.88800618744152, "learning_rate": 9.287404006341498e-06, "loss": 17.3901, "step": 10771 }, { "epoch": 0.1969035041219588, "grad_norm": 7.202044151925685, "learning_rate": 9.287251696166777e-06, "loss": 17.5591, "step": 10772 }, { "epoch": 0.19692178331840532, "grad_norm": 8.632297946052338, "learning_rate": 9.28709937096561e-06, "loss": 18.4776, "step": 10773 }, { "epoch": 0.19694006251485185, "grad_norm": 9.534936187783808, "learning_rate": 9.286947030738532e-06, "loss": 18.5315, "step": 10774 }, { "epoch": 0.19695834171129836, "grad_norm": 6.679001612778986, "learning_rate": 9.286794675486076e-06, "loss": 17.3928, "step": 10775 }, { "epoch": 0.1969766209077449, "grad_norm": 6.254906701770776, "learning_rate": 9.286642305208776e-06, "loss": 17.3015, "step": 10776 }, { "epoch": 0.1969949001041914, "grad_norm": 6.088482117192647, "learning_rate": 9.28648991990717e-06, "loss": 17.483, "step": 10777 }, { "epoch": 0.19701317930063794, "grad_norm": 7.250488850608622, "learning_rate": 9.286337519581786e-06, "loss": 17.6471, "step": 10778 }, { "epoch": 0.19703145849708448, "grad_norm": 7.044707665896951, "learning_rate": 9.286185104233162e-06, "loss": 17.7596, "step": 10779 }, { "epoch": 0.19704973769353099, "grad_norm": 7.233640126102035, "learning_rate": 9.286032673861831e-06, "loss": 18.0291, "step": 10780 }, { "epoch": 0.19706801688997752, "grad_norm": 7.2387001895141205, "learning_rate": 9.285880228468327e-06, "loss": 17.7576, "step": 10781 }, { "epoch": 0.19708629608642403, "grad_norm": 6.579224732400311, "learning_rate": 9.285727768053185e-06, "loss": 17.5907, "step": 10782 }, { "epoch": 0.19710457528287056, "grad_norm": 7.167222966537241, "learning_rate": 9.285575292616938e-06, "loss": 17.7615, "step": 10783 }, { "epoch": 0.1971228544793171, "grad_norm": 6.263058696373889, "learning_rate": 9.285422802160123e-06, "loss": 17.6282, "step": 10784 }, { "epoch": 0.1971411336757636, "grad_norm": 5.558002420549016, "learning_rate": 9.285270296683273e-06, "loss": 17.2044, "step": 10785 }, { "epoch": 0.19715941287221014, "grad_norm": 6.457501984565157, "learning_rate": 9.285117776186922e-06, "loss": 17.3073, "step": 10786 }, { "epoch": 0.19717769206865665, "grad_norm": 6.8279219970562215, "learning_rate": 9.284965240671604e-06, "loss": 17.796, "step": 10787 }, { "epoch": 0.1971959712651032, "grad_norm": 6.265323462252164, "learning_rate": 9.284812690137857e-06, "loss": 17.3556, "step": 10788 }, { "epoch": 0.19721425046154972, "grad_norm": 7.24634507097224, "learning_rate": 9.28466012458621e-06, "loss": 17.9901, "step": 10789 }, { "epoch": 0.19723252965799623, "grad_norm": 6.7671818090699976, "learning_rate": 9.284507544017203e-06, "loss": 17.3748, "step": 10790 }, { "epoch": 0.19725080885444277, "grad_norm": 7.11350885505168, "learning_rate": 9.28435494843137e-06, "loss": 17.4191, "step": 10791 }, { "epoch": 0.19726908805088927, "grad_norm": 6.429692882394918, "learning_rate": 9.284202337829244e-06, "loss": 17.6613, "step": 10792 }, { "epoch": 0.1972873672473358, "grad_norm": 6.543431871173417, "learning_rate": 9.28404971221136e-06, "loss": 17.5675, "step": 10793 }, { "epoch": 0.19730564644378232, "grad_norm": 7.3774655749339315, "learning_rate": 9.283897071578254e-06, "loss": 17.8745, "step": 10794 }, { "epoch": 0.19732392564022885, "grad_norm": 7.393943625505083, "learning_rate": 9.28374441593046e-06, "loss": 17.5336, "step": 10795 }, { "epoch": 0.1973422048366754, "grad_norm": 6.2664293535677995, "learning_rate": 9.283591745268512e-06, "loss": 17.5629, "step": 10796 }, { "epoch": 0.1973604840331219, "grad_norm": 6.8093411548541365, "learning_rate": 9.283439059592949e-06, "loss": 17.6704, "step": 10797 }, { "epoch": 0.19737876322956843, "grad_norm": 6.954806390677689, "learning_rate": 9.283286358904304e-06, "loss": 17.5025, "step": 10798 }, { "epoch": 0.19739704242601494, "grad_norm": 8.512641998122215, "learning_rate": 9.283133643203111e-06, "loss": 18.2705, "step": 10799 }, { "epoch": 0.19741532162246148, "grad_norm": 6.994446648295691, "learning_rate": 9.282980912489906e-06, "loss": 17.8209, "step": 10800 }, { "epoch": 0.197433600818908, "grad_norm": 6.001762851933642, "learning_rate": 9.282828166765226e-06, "loss": 17.3401, "step": 10801 }, { "epoch": 0.19745188001535452, "grad_norm": 6.380943936450123, "learning_rate": 9.282675406029604e-06, "loss": 17.1109, "step": 10802 }, { "epoch": 0.19747015921180106, "grad_norm": 6.851594593750535, "learning_rate": 9.282522630283575e-06, "loss": 17.4685, "step": 10803 }, { "epoch": 0.19748843840824756, "grad_norm": 6.1245737107628875, "learning_rate": 9.282369839527678e-06, "loss": 17.3703, "step": 10804 }, { "epoch": 0.1975067176046941, "grad_norm": 7.088313913282194, "learning_rate": 9.282217033762444e-06, "loss": 17.644, "step": 10805 }, { "epoch": 0.19752499680114063, "grad_norm": 6.70642803619076, "learning_rate": 9.282064212988414e-06, "loss": 17.7818, "step": 10806 }, { "epoch": 0.19754327599758714, "grad_norm": 5.523099811272099, "learning_rate": 9.281911377206118e-06, "loss": 17.089, "step": 10807 }, { "epoch": 0.19756155519403368, "grad_norm": 7.182200762733401, "learning_rate": 9.281758526416094e-06, "loss": 17.5055, "step": 10808 }, { "epoch": 0.19757983439048019, "grad_norm": 5.83526328434585, "learning_rate": 9.28160566061888e-06, "loss": 16.8324, "step": 10809 }, { "epoch": 0.19759811358692672, "grad_norm": 7.869972066413671, "learning_rate": 9.281452779815007e-06, "loss": 18.2756, "step": 10810 }, { "epoch": 0.19761639278337323, "grad_norm": 5.962958806178682, "learning_rate": 9.281299884005017e-06, "loss": 17.4023, "step": 10811 }, { "epoch": 0.19763467197981976, "grad_norm": 6.822327165227126, "learning_rate": 9.28114697318944e-06, "loss": 17.5601, "step": 10812 }, { "epoch": 0.1976529511762663, "grad_norm": 7.4630866633682365, "learning_rate": 9.280994047368815e-06, "loss": 17.8554, "step": 10813 }, { "epoch": 0.1976712303727128, "grad_norm": 7.617800974958288, "learning_rate": 9.280841106543677e-06, "loss": 17.9616, "step": 10814 }, { "epoch": 0.19768950956915934, "grad_norm": 6.9302645605626525, "learning_rate": 9.280688150714562e-06, "loss": 17.2055, "step": 10815 }, { "epoch": 0.19770778876560585, "grad_norm": 6.354683628285268, "learning_rate": 9.280535179882008e-06, "loss": 17.3398, "step": 10816 }, { "epoch": 0.1977260679620524, "grad_norm": 5.553582914311273, "learning_rate": 9.280382194046548e-06, "loss": 17.2443, "step": 10817 }, { "epoch": 0.19774434715849892, "grad_norm": 6.313404246286071, "learning_rate": 9.28022919320872e-06, "loss": 17.6153, "step": 10818 }, { "epoch": 0.19776262635494543, "grad_norm": 7.183189177438069, "learning_rate": 9.280076177369062e-06, "loss": 17.8033, "step": 10819 }, { "epoch": 0.19778090555139197, "grad_norm": 6.426110523637121, "learning_rate": 9.279923146528106e-06, "loss": 17.4548, "step": 10820 }, { "epoch": 0.19779918474783847, "grad_norm": 6.8997417900141835, "learning_rate": 9.279770100686391e-06, "loss": 17.6989, "step": 10821 }, { "epoch": 0.197817463944285, "grad_norm": 7.029461683598869, "learning_rate": 9.279617039844455e-06, "loss": 17.317, "step": 10822 }, { "epoch": 0.19783574314073155, "grad_norm": 5.334324070800717, "learning_rate": 9.279463964002832e-06, "loss": 17.0673, "step": 10823 }, { "epoch": 0.19785402233717805, "grad_norm": 7.220410031420295, "learning_rate": 9.279310873162059e-06, "loss": 17.7454, "step": 10824 }, { "epoch": 0.1978723015336246, "grad_norm": 6.445500181237727, "learning_rate": 9.279157767322673e-06, "loss": 17.4973, "step": 10825 }, { "epoch": 0.1978905807300711, "grad_norm": 7.370052113078593, "learning_rate": 9.27900464648521e-06, "loss": 18.4546, "step": 10826 }, { "epoch": 0.19790885992651763, "grad_norm": 7.436753759920104, "learning_rate": 9.278851510650207e-06, "loss": 17.5844, "step": 10827 }, { "epoch": 0.19792713912296414, "grad_norm": 6.357018547397667, "learning_rate": 9.2786983598182e-06, "loss": 17.5205, "step": 10828 }, { "epoch": 0.19794541831941068, "grad_norm": 6.274501885764996, "learning_rate": 9.278545193989728e-06, "loss": 17.3663, "step": 10829 }, { "epoch": 0.1979636975158572, "grad_norm": 6.895095266400871, "learning_rate": 9.278392013165325e-06, "loss": 17.6496, "step": 10830 }, { "epoch": 0.19798197671230372, "grad_norm": 5.365151583121495, "learning_rate": 9.27823881734553e-06, "loss": 17.0242, "step": 10831 }, { "epoch": 0.19800025590875026, "grad_norm": 7.001238935540446, "learning_rate": 9.278085606530879e-06, "loss": 17.775, "step": 10832 }, { "epoch": 0.19801853510519676, "grad_norm": 7.1434068090519265, "learning_rate": 9.277932380721908e-06, "loss": 17.6297, "step": 10833 }, { "epoch": 0.1980368143016433, "grad_norm": 7.27685512217264, "learning_rate": 9.277779139919157e-06, "loss": 17.9849, "step": 10834 }, { "epoch": 0.19805509349808983, "grad_norm": 6.499332836240062, "learning_rate": 9.277625884123162e-06, "loss": 17.5162, "step": 10835 }, { "epoch": 0.19807337269453634, "grad_norm": 7.142852026254099, "learning_rate": 9.277472613334457e-06, "loss": 17.7445, "step": 10836 }, { "epoch": 0.19809165189098288, "grad_norm": 8.129783649562425, "learning_rate": 9.277319327553584e-06, "loss": 18.3967, "step": 10837 }, { "epoch": 0.1981099310874294, "grad_norm": 6.8305374317355705, "learning_rate": 9.277166026781076e-06, "loss": 17.5188, "step": 10838 }, { "epoch": 0.19812821028387592, "grad_norm": 6.344683701928417, "learning_rate": 9.277012711017473e-06, "loss": 17.4358, "step": 10839 }, { "epoch": 0.19814648948032246, "grad_norm": 7.054374100658382, "learning_rate": 9.276859380263313e-06, "loss": 17.6545, "step": 10840 }, { "epoch": 0.19816476867676897, "grad_norm": 6.621685344435275, "learning_rate": 9.276706034519129e-06, "loss": 17.7552, "step": 10841 }, { "epoch": 0.1981830478732155, "grad_norm": 6.43218685786527, "learning_rate": 9.276552673785464e-06, "loss": 17.2751, "step": 10842 }, { "epoch": 0.198201327069662, "grad_norm": 5.896436973847265, "learning_rate": 9.276399298062853e-06, "loss": 16.996, "step": 10843 }, { "epoch": 0.19821960626610854, "grad_norm": 12.692140193254197, "learning_rate": 9.276245907351831e-06, "loss": 18.7467, "step": 10844 }, { "epoch": 0.19823788546255505, "grad_norm": 8.392272527927158, "learning_rate": 9.27609250165294e-06, "loss": 18.1811, "step": 10845 }, { "epoch": 0.1982561646590016, "grad_norm": 6.3733792969341, "learning_rate": 9.275939080966716e-06, "loss": 17.4744, "step": 10846 }, { "epoch": 0.19827444385544812, "grad_norm": 6.670479841457684, "learning_rate": 9.275785645293697e-06, "loss": 17.6064, "step": 10847 }, { "epoch": 0.19829272305189463, "grad_norm": 7.498507478063741, "learning_rate": 9.27563219463442e-06, "loss": 17.9733, "step": 10848 }, { "epoch": 0.19831100224834117, "grad_norm": 6.610138137691433, "learning_rate": 9.275478728989422e-06, "loss": 17.5745, "step": 10849 }, { "epoch": 0.19832928144478768, "grad_norm": 10.92354219974126, "learning_rate": 9.275325248359245e-06, "loss": 18.6201, "step": 10850 }, { "epoch": 0.1983475606412342, "grad_norm": 7.570411642303618, "learning_rate": 9.27517175274442e-06, "loss": 18.1232, "step": 10851 }, { "epoch": 0.19836583983768075, "grad_norm": 7.008058196705809, "learning_rate": 9.27501824214549e-06, "loss": 17.7092, "step": 10852 }, { "epoch": 0.19838411903412725, "grad_norm": 6.787764942709426, "learning_rate": 9.274864716562993e-06, "loss": 17.4613, "step": 10853 }, { "epoch": 0.1984023982305738, "grad_norm": 7.169628550706973, "learning_rate": 9.274711175997466e-06, "loss": 17.7941, "step": 10854 }, { "epoch": 0.1984206774270203, "grad_norm": 6.785940497665481, "learning_rate": 9.274557620449448e-06, "loss": 17.6994, "step": 10855 }, { "epoch": 0.19843895662346683, "grad_norm": 5.900930090399167, "learning_rate": 9.274404049919475e-06, "loss": 17.4232, "step": 10856 }, { "epoch": 0.19845723581991337, "grad_norm": 6.298469309316371, "learning_rate": 9.274250464408087e-06, "loss": 17.6218, "step": 10857 }, { "epoch": 0.19847551501635988, "grad_norm": 7.4420262749846, "learning_rate": 9.274096863915824e-06, "loss": 18.0382, "step": 10858 }, { "epoch": 0.1984937942128064, "grad_norm": 8.457845366616512, "learning_rate": 9.27394324844322e-06, "loss": 18.1733, "step": 10859 }, { "epoch": 0.19851207340925292, "grad_norm": 6.5472351756218075, "learning_rate": 9.273789617990818e-06, "loss": 17.4029, "step": 10860 }, { "epoch": 0.19853035260569946, "grad_norm": 6.117378900362806, "learning_rate": 9.273635972559152e-06, "loss": 17.3464, "step": 10861 }, { "epoch": 0.19854863180214596, "grad_norm": 6.69009016103747, "learning_rate": 9.273482312148766e-06, "loss": 17.4837, "step": 10862 }, { "epoch": 0.1985669109985925, "grad_norm": 6.675722361896055, "learning_rate": 9.27332863676019e-06, "loss": 17.777, "step": 10863 }, { "epoch": 0.19858519019503904, "grad_norm": 6.670294770073691, "learning_rate": 9.273174946393973e-06, "loss": 17.6327, "step": 10864 }, { "epoch": 0.19860346939148554, "grad_norm": 5.8263897901825334, "learning_rate": 9.273021241050645e-06, "loss": 17.1493, "step": 10865 }, { "epoch": 0.19862174858793208, "grad_norm": 6.1740388700652105, "learning_rate": 9.27286752073075e-06, "loss": 17.3768, "step": 10866 }, { "epoch": 0.1986400277843786, "grad_norm": 5.703210716908118, "learning_rate": 9.272713785434827e-06, "loss": 17.1263, "step": 10867 }, { "epoch": 0.19865830698082512, "grad_norm": 7.443573776636084, "learning_rate": 9.27256003516341e-06, "loss": 18.0349, "step": 10868 }, { "epoch": 0.19867658617727166, "grad_norm": 5.991726657722256, "learning_rate": 9.272406269917041e-06, "loss": 17.1115, "step": 10869 }, { "epoch": 0.19869486537371817, "grad_norm": 7.437226033210058, "learning_rate": 9.27225248969626e-06, "loss": 18.4094, "step": 10870 }, { "epoch": 0.1987131445701647, "grad_norm": 7.523218516674712, "learning_rate": 9.272098694501604e-06, "loss": 17.7628, "step": 10871 }, { "epoch": 0.1987314237666112, "grad_norm": 6.54169780413771, "learning_rate": 9.271944884333614e-06, "loss": 17.5334, "step": 10872 }, { "epoch": 0.19874970296305774, "grad_norm": 7.856703845300767, "learning_rate": 9.271791059192828e-06, "loss": 18.1138, "step": 10873 }, { "epoch": 0.19876798215950428, "grad_norm": 7.311420298885436, "learning_rate": 9.271637219079784e-06, "loss": 17.978, "step": 10874 }, { "epoch": 0.1987862613559508, "grad_norm": 7.086903096477535, "learning_rate": 9.27148336399502e-06, "loss": 17.587, "step": 10875 }, { "epoch": 0.19880454055239732, "grad_norm": 6.838484668499034, "learning_rate": 9.27132949393908e-06, "loss": 17.5756, "step": 10876 }, { "epoch": 0.19882281974884383, "grad_norm": 6.848692496481984, "learning_rate": 9.271175608912501e-06, "loss": 17.5735, "step": 10877 }, { "epoch": 0.19884109894529037, "grad_norm": 8.390905815444885, "learning_rate": 9.271021708915822e-06, "loss": 18.1065, "step": 10878 }, { "epoch": 0.19885937814173688, "grad_norm": 6.230535545187577, "learning_rate": 9.270867793949583e-06, "loss": 17.6444, "step": 10879 }, { "epoch": 0.1988776573381834, "grad_norm": 6.248780898488394, "learning_rate": 9.270713864014321e-06, "loss": 17.6363, "step": 10880 }, { "epoch": 0.19889593653462995, "grad_norm": 6.088296431862701, "learning_rate": 9.270559919110579e-06, "loss": 17.0838, "step": 10881 }, { "epoch": 0.19891421573107645, "grad_norm": 7.964064373572228, "learning_rate": 9.270405959238896e-06, "loss": 17.9265, "step": 10882 }, { "epoch": 0.198932494927523, "grad_norm": 6.243094934678688, "learning_rate": 9.270251984399807e-06, "loss": 17.4484, "step": 10883 }, { "epoch": 0.1989507741239695, "grad_norm": 7.907115831522865, "learning_rate": 9.270097994593859e-06, "loss": 18.049, "step": 10884 }, { "epoch": 0.19896905332041603, "grad_norm": 6.272438758235311, "learning_rate": 9.269943989821587e-06, "loss": 17.3441, "step": 10885 }, { "epoch": 0.19898733251686257, "grad_norm": 6.211685599992287, "learning_rate": 9.269789970083531e-06, "loss": 17.1009, "step": 10886 }, { "epoch": 0.19900561171330908, "grad_norm": 5.111757953037374, "learning_rate": 9.269635935380233e-06, "loss": 16.9119, "step": 10887 }, { "epoch": 0.1990238909097556, "grad_norm": 7.02040051715025, "learning_rate": 9.26948188571223e-06, "loss": 17.9046, "step": 10888 }, { "epoch": 0.19904217010620212, "grad_norm": 5.70667606270827, "learning_rate": 9.269327821080064e-06, "loss": 17.2795, "step": 10889 }, { "epoch": 0.19906044930264866, "grad_norm": 6.159647776524561, "learning_rate": 9.269173741484277e-06, "loss": 17.2703, "step": 10890 }, { "epoch": 0.1990787284990952, "grad_norm": 7.719437738199956, "learning_rate": 9.269019646925404e-06, "loss": 17.9644, "step": 10891 }, { "epoch": 0.1990970076955417, "grad_norm": 5.748278172028444, "learning_rate": 9.268865537403987e-06, "loss": 17.1837, "step": 10892 }, { "epoch": 0.19911528689198824, "grad_norm": 6.12576407150042, "learning_rate": 9.268711412920567e-06, "loss": 17.3183, "step": 10893 }, { "epoch": 0.19913356608843474, "grad_norm": 7.2380097679109845, "learning_rate": 9.268557273475685e-06, "loss": 17.6635, "step": 10894 }, { "epoch": 0.19915184528488128, "grad_norm": 7.244045805527676, "learning_rate": 9.26840311906988e-06, "loss": 17.9317, "step": 10895 }, { "epoch": 0.1991701244813278, "grad_norm": 7.473918145896732, "learning_rate": 9.268248949703693e-06, "loss": 17.9833, "step": 10896 }, { "epoch": 0.19918840367777432, "grad_norm": 9.104871874023008, "learning_rate": 9.268094765377662e-06, "loss": 18.0553, "step": 10897 }, { "epoch": 0.19920668287422086, "grad_norm": 6.944195293043811, "learning_rate": 9.26794056609233e-06, "loss": 17.1542, "step": 10898 }, { "epoch": 0.19922496207066737, "grad_norm": 5.977489454455816, "learning_rate": 9.267786351848238e-06, "loss": 17.3206, "step": 10899 }, { "epoch": 0.1992432412671139, "grad_norm": 7.571865722419859, "learning_rate": 9.267632122645924e-06, "loss": 18.0964, "step": 10900 }, { "epoch": 0.1992615204635604, "grad_norm": 7.121409349012491, "learning_rate": 9.26747787848593e-06, "loss": 17.2025, "step": 10901 }, { "epoch": 0.19927979966000695, "grad_norm": 7.834341990595362, "learning_rate": 9.267323619368795e-06, "loss": 17.7799, "step": 10902 }, { "epoch": 0.19929807885645348, "grad_norm": 6.495101935179148, "learning_rate": 9.267169345295063e-06, "loss": 17.5505, "step": 10903 }, { "epoch": 0.1993163580529, "grad_norm": 8.68647864663089, "learning_rate": 9.267015056265272e-06, "loss": 18.4633, "step": 10904 }, { "epoch": 0.19933463724934652, "grad_norm": 5.533154408312759, "learning_rate": 9.266860752279964e-06, "loss": 17.2265, "step": 10905 }, { "epoch": 0.19935291644579303, "grad_norm": 9.045653357664435, "learning_rate": 9.266706433339678e-06, "loss": 18.4361, "step": 10906 }, { "epoch": 0.19937119564223957, "grad_norm": 6.681364541307185, "learning_rate": 9.266552099444957e-06, "loss": 17.7212, "step": 10907 }, { "epoch": 0.1993894748386861, "grad_norm": 8.276329180423547, "learning_rate": 9.26639775059634e-06, "loss": 18.4311, "step": 10908 }, { "epoch": 0.1994077540351326, "grad_norm": 7.544906962956952, "learning_rate": 9.266243386794372e-06, "loss": 17.7837, "step": 10909 }, { "epoch": 0.19942603323157915, "grad_norm": 6.3703401990947155, "learning_rate": 9.266089008039589e-06, "loss": 17.3522, "step": 10910 }, { "epoch": 0.19944431242802566, "grad_norm": 7.063462640206068, "learning_rate": 9.265934614332534e-06, "loss": 17.6295, "step": 10911 }, { "epoch": 0.1994625916244722, "grad_norm": 7.051708868623823, "learning_rate": 9.265780205673749e-06, "loss": 17.9861, "step": 10912 }, { "epoch": 0.1994808708209187, "grad_norm": 6.990076011496199, "learning_rate": 9.265625782063774e-06, "loss": 17.8551, "step": 10913 }, { "epoch": 0.19949915001736523, "grad_norm": 6.814238402632026, "learning_rate": 9.265471343503152e-06, "loss": 17.6684, "step": 10914 }, { "epoch": 0.19951742921381177, "grad_norm": 6.723381483146955, "learning_rate": 9.265316889992422e-06, "loss": 18.0331, "step": 10915 }, { "epoch": 0.19953570841025828, "grad_norm": 6.216760276952046, "learning_rate": 9.265162421532126e-06, "loss": 17.3248, "step": 10916 }, { "epoch": 0.1995539876067048, "grad_norm": 7.183698849365839, "learning_rate": 9.265007938122807e-06, "loss": 17.9315, "step": 10917 }, { "epoch": 0.19957226680315132, "grad_norm": 7.490734028144163, "learning_rate": 9.264853439765005e-06, "loss": 18.2393, "step": 10918 }, { "epoch": 0.19959054599959786, "grad_norm": 8.24745633213347, "learning_rate": 9.264698926459261e-06, "loss": 18.0886, "step": 10919 }, { "epoch": 0.1996088251960444, "grad_norm": 6.113159803142408, "learning_rate": 9.264544398206119e-06, "loss": 17.486, "step": 10920 }, { "epoch": 0.1996271043924909, "grad_norm": 8.206470583068661, "learning_rate": 9.264389855006118e-06, "loss": 18.1456, "step": 10921 }, { "epoch": 0.19964538358893744, "grad_norm": 7.452119559441181, "learning_rate": 9.2642352968598e-06, "loss": 17.9846, "step": 10922 }, { "epoch": 0.19966366278538394, "grad_norm": 6.938408694079041, "learning_rate": 9.264080723767707e-06, "loss": 17.8101, "step": 10923 }, { "epoch": 0.19968194198183048, "grad_norm": 6.019695578849084, "learning_rate": 9.263926135730383e-06, "loss": 17.3154, "step": 10924 }, { "epoch": 0.19970022117827702, "grad_norm": 6.782514032171597, "learning_rate": 9.263771532748367e-06, "loss": 17.5067, "step": 10925 }, { "epoch": 0.19971850037472352, "grad_norm": 6.118197987069956, "learning_rate": 9.263616914822201e-06, "loss": 17.0961, "step": 10926 }, { "epoch": 0.19973677957117006, "grad_norm": 7.035595199165753, "learning_rate": 9.26346228195243e-06, "loss": 17.7247, "step": 10927 }, { "epoch": 0.19975505876761657, "grad_norm": 6.921837355340135, "learning_rate": 9.26330763413959e-06, "loss": 17.6363, "step": 10928 }, { "epoch": 0.1997733379640631, "grad_norm": 7.238623576682792, "learning_rate": 9.26315297138423e-06, "loss": 17.6164, "step": 10929 }, { "epoch": 0.1997916171605096, "grad_norm": 9.928512166924047, "learning_rate": 9.262998293686888e-06, "loss": 17.7197, "step": 10930 }, { "epoch": 0.19980989635695615, "grad_norm": 5.792244353120198, "learning_rate": 9.262843601048104e-06, "loss": 17.2798, "step": 10931 }, { "epoch": 0.19982817555340268, "grad_norm": 5.401133240098436, "learning_rate": 9.262688893468426e-06, "loss": 17.0325, "step": 10932 }, { "epoch": 0.1998464547498492, "grad_norm": 6.911698034065736, "learning_rate": 9.262534170948392e-06, "loss": 17.7217, "step": 10933 }, { "epoch": 0.19986473394629573, "grad_norm": 7.515353464874539, "learning_rate": 9.262379433488547e-06, "loss": 17.8443, "step": 10934 }, { "epoch": 0.19988301314274223, "grad_norm": 6.765047061950747, "learning_rate": 9.262224681089432e-06, "loss": 17.6567, "step": 10935 }, { "epoch": 0.19990129233918877, "grad_norm": 6.717639189321588, "learning_rate": 9.262069913751589e-06, "loss": 17.6313, "step": 10936 }, { "epoch": 0.1999195715356353, "grad_norm": 7.848672015083547, "learning_rate": 9.261915131475561e-06, "loss": 17.9072, "step": 10937 }, { "epoch": 0.1999378507320818, "grad_norm": 7.656189118648847, "learning_rate": 9.261760334261888e-06, "loss": 18.2532, "step": 10938 }, { "epoch": 0.19995612992852835, "grad_norm": 6.906178224734655, "learning_rate": 9.261605522111117e-06, "loss": 17.8904, "step": 10939 }, { "epoch": 0.19997440912497486, "grad_norm": 5.500725992902303, "learning_rate": 9.261450695023789e-06, "loss": 17.2585, "step": 10940 }, { "epoch": 0.1999926883214214, "grad_norm": 7.063104493861325, "learning_rate": 9.261295853000445e-06, "loss": 17.7415, "step": 10941 }, { "epoch": 0.20001096751786793, "grad_norm": 7.882567722388475, "learning_rate": 9.26114099604163e-06, "loss": 18.3038, "step": 10942 }, { "epoch": 0.20002924671431443, "grad_norm": 6.572875620156166, "learning_rate": 9.260986124147884e-06, "loss": 17.6132, "step": 10943 }, { "epoch": 0.20004752591076097, "grad_norm": 6.43107508416896, "learning_rate": 9.260831237319752e-06, "loss": 17.4252, "step": 10944 }, { "epoch": 0.20006580510720748, "grad_norm": 6.133192475896936, "learning_rate": 9.260676335557774e-06, "loss": 17.3416, "step": 10945 }, { "epoch": 0.20008408430365401, "grad_norm": 5.888722241992663, "learning_rate": 9.260521418862498e-06, "loss": 17.3938, "step": 10946 }, { "epoch": 0.20010236350010052, "grad_norm": 8.157620817807263, "learning_rate": 9.260366487234465e-06, "loss": 18.2197, "step": 10947 }, { "epoch": 0.20012064269654706, "grad_norm": 7.065616268419404, "learning_rate": 9.260211540674215e-06, "loss": 17.7513, "step": 10948 }, { "epoch": 0.2001389218929936, "grad_norm": 6.562597400109817, "learning_rate": 9.260056579182292e-06, "loss": 17.3057, "step": 10949 }, { "epoch": 0.2001572010894401, "grad_norm": 6.39383651983612, "learning_rate": 9.259901602759244e-06, "loss": 17.6876, "step": 10950 }, { "epoch": 0.20017548028588664, "grad_norm": 6.396783115609105, "learning_rate": 9.25974661140561e-06, "loss": 17.347, "step": 10951 }, { "epoch": 0.20019375948233314, "grad_norm": 5.385791227480741, "learning_rate": 9.259591605121932e-06, "loss": 17.1368, "step": 10952 }, { "epoch": 0.20021203867877968, "grad_norm": 6.698850836391977, "learning_rate": 9.259436583908754e-06, "loss": 17.3805, "step": 10953 }, { "epoch": 0.20023031787522622, "grad_norm": 6.7136310001901744, "learning_rate": 9.259281547766623e-06, "loss": 17.939, "step": 10954 }, { "epoch": 0.20024859707167272, "grad_norm": 6.801087623392054, "learning_rate": 9.259126496696079e-06, "loss": 17.7017, "step": 10955 }, { "epoch": 0.20026687626811926, "grad_norm": 7.7693655840737526, "learning_rate": 9.258971430697666e-06, "loss": 17.7556, "step": 10956 }, { "epoch": 0.20028515546456577, "grad_norm": 6.9253554573314515, "learning_rate": 9.258816349771927e-06, "loss": 17.4131, "step": 10957 }, { "epoch": 0.2003034346610123, "grad_norm": 6.094023118280109, "learning_rate": 9.258661253919408e-06, "loss": 17.5846, "step": 10958 }, { "epoch": 0.20032171385745884, "grad_norm": 6.475855412892942, "learning_rate": 9.25850614314065e-06, "loss": 17.5506, "step": 10959 }, { "epoch": 0.20033999305390535, "grad_norm": 6.121848840087118, "learning_rate": 9.258351017436196e-06, "loss": 17.2962, "step": 10960 }, { "epoch": 0.20035827225035188, "grad_norm": 6.760167166388431, "learning_rate": 9.258195876806593e-06, "loss": 17.7835, "step": 10961 }, { "epoch": 0.2003765514467984, "grad_norm": 5.917783471813344, "learning_rate": 9.258040721252383e-06, "loss": 17.2104, "step": 10962 }, { "epoch": 0.20039483064324493, "grad_norm": 8.432581227712953, "learning_rate": 9.257885550774108e-06, "loss": 18.203, "step": 10963 }, { "epoch": 0.20041310983969143, "grad_norm": 6.43125791912894, "learning_rate": 9.257730365372315e-06, "loss": 17.3735, "step": 10964 }, { "epoch": 0.20043138903613797, "grad_norm": 6.348945505247846, "learning_rate": 9.257575165047547e-06, "loss": 17.5365, "step": 10965 }, { "epoch": 0.2004496682325845, "grad_norm": 5.887554857610085, "learning_rate": 9.257419949800347e-06, "loss": 17.173, "step": 10966 }, { "epoch": 0.200467947429031, "grad_norm": 7.285541336636381, "learning_rate": 9.257264719631259e-06, "loss": 17.8792, "step": 10967 }, { "epoch": 0.20048622662547755, "grad_norm": 7.007494663319904, "learning_rate": 9.257109474540828e-06, "loss": 17.6362, "step": 10968 }, { "epoch": 0.20050450582192406, "grad_norm": 6.328330410958518, "learning_rate": 9.256954214529599e-06, "loss": 17.5554, "step": 10969 }, { "epoch": 0.2005227850183706, "grad_norm": 6.510092270616266, "learning_rate": 9.256798939598113e-06, "loss": 17.5696, "step": 10970 }, { "epoch": 0.20054106421481713, "grad_norm": 5.909293919661958, "learning_rate": 9.256643649746917e-06, "loss": 17.2425, "step": 10971 }, { "epoch": 0.20055934341126364, "grad_norm": 6.915669059292693, "learning_rate": 9.256488344976552e-06, "loss": 17.5665, "step": 10972 }, { "epoch": 0.20057762260771017, "grad_norm": 7.034467967793795, "learning_rate": 9.256333025287569e-06, "loss": 17.6718, "step": 10973 }, { "epoch": 0.20059590180415668, "grad_norm": 6.076594057259614, "learning_rate": 9.256177690680506e-06, "loss": 17.0781, "step": 10974 }, { "epoch": 0.20061418100060321, "grad_norm": 5.794475106139136, "learning_rate": 9.256022341155909e-06, "loss": 17.5427, "step": 10975 }, { "epoch": 0.20063246019704975, "grad_norm": 5.456558611233308, "learning_rate": 9.255866976714323e-06, "loss": 17.1452, "step": 10976 }, { "epoch": 0.20065073939349626, "grad_norm": 7.4855302093332075, "learning_rate": 9.255711597356293e-06, "loss": 17.6005, "step": 10977 }, { "epoch": 0.2006690185899428, "grad_norm": 8.035824468405956, "learning_rate": 9.255556203082363e-06, "loss": 17.6279, "step": 10978 }, { "epoch": 0.2006872977863893, "grad_norm": 6.521175712373388, "learning_rate": 9.25540079389308e-06, "loss": 17.6865, "step": 10979 }, { "epoch": 0.20070557698283584, "grad_norm": 6.731090043339445, "learning_rate": 9.255245369788983e-06, "loss": 17.648, "step": 10980 }, { "epoch": 0.20072385617928235, "grad_norm": 8.79038401908676, "learning_rate": 9.255089930770621e-06, "loss": 17.3754, "step": 10981 }, { "epoch": 0.20074213537572888, "grad_norm": 6.29924407503212, "learning_rate": 9.254934476838539e-06, "loss": 17.3005, "step": 10982 }, { "epoch": 0.20076041457217542, "grad_norm": 5.544228481101844, "learning_rate": 9.254779007993281e-06, "loss": 17.005, "step": 10983 }, { "epoch": 0.20077869376862192, "grad_norm": 8.489774790847997, "learning_rate": 9.254623524235392e-06, "loss": 18.2677, "step": 10984 }, { "epoch": 0.20079697296506846, "grad_norm": 7.079374417733842, "learning_rate": 9.254468025565414e-06, "loss": 17.6445, "step": 10985 }, { "epoch": 0.20081525216151497, "grad_norm": 7.550008096182841, "learning_rate": 9.254312511983898e-06, "loss": 17.9238, "step": 10986 }, { "epoch": 0.2008335313579615, "grad_norm": 6.945211437259713, "learning_rate": 9.254156983491385e-06, "loss": 17.7057, "step": 10987 }, { "epoch": 0.20085181055440804, "grad_norm": 7.7531398968572685, "learning_rate": 9.25400144008842e-06, "loss": 17.6414, "step": 10988 }, { "epoch": 0.20087008975085455, "grad_norm": 6.249767722058566, "learning_rate": 9.25384588177555e-06, "loss": 17.4012, "step": 10989 }, { "epoch": 0.20088836894730108, "grad_norm": 7.076621240543565, "learning_rate": 9.253690308553318e-06, "loss": 17.9639, "step": 10990 }, { "epoch": 0.2009066481437476, "grad_norm": 6.622467245554146, "learning_rate": 9.253534720422272e-06, "loss": 17.5139, "step": 10991 }, { "epoch": 0.20092492734019413, "grad_norm": 10.602283253980888, "learning_rate": 9.253379117382957e-06, "loss": 18.7819, "step": 10992 }, { "epoch": 0.20094320653664066, "grad_norm": 7.706285912694962, "learning_rate": 9.253223499435916e-06, "loss": 17.9942, "step": 10993 }, { "epoch": 0.20096148573308717, "grad_norm": 7.467509917385291, "learning_rate": 9.253067866581696e-06, "loss": 17.9538, "step": 10994 }, { "epoch": 0.2009797649295337, "grad_norm": 7.20151835260283, "learning_rate": 9.25291221882084e-06, "loss": 17.9342, "step": 10995 }, { "epoch": 0.2009980441259802, "grad_norm": 6.681603235474797, "learning_rate": 9.252756556153898e-06, "loss": 17.5466, "step": 10996 }, { "epoch": 0.20101632332242675, "grad_norm": 5.785585526516853, "learning_rate": 9.252600878581413e-06, "loss": 17.3131, "step": 10997 }, { "epoch": 0.20103460251887326, "grad_norm": 6.260524820848509, "learning_rate": 9.252445186103931e-06, "loss": 17.3714, "step": 10998 }, { "epoch": 0.2010528817153198, "grad_norm": 5.946378521976767, "learning_rate": 9.252289478721996e-06, "loss": 17.1623, "step": 10999 }, { "epoch": 0.20107116091176633, "grad_norm": 6.690654018343409, "learning_rate": 9.252133756436158e-06, "loss": 17.7501, "step": 11000 }, { "epoch": 0.20108944010821284, "grad_norm": 6.182391269665302, "learning_rate": 9.251978019246957e-06, "loss": 17.2598, "step": 11001 }, { "epoch": 0.20110771930465937, "grad_norm": 6.812728097434233, "learning_rate": 9.251822267154946e-06, "loss": 17.5469, "step": 11002 }, { "epoch": 0.20112599850110588, "grad_norm": 7.386204815518348, "learning_rate": 9.251666500160663e-06, "loss": 17.613, "step": 11003 }, { "epoch": 0.20114427769755241, "grad_norm": 6.417796772408095, "learning_rate": 9.251510718264661e-06, "loss": 17.3802, "step": 11004 }, { "epoch": 0.20116255689399895, "grad_norm": 7.005406068602088, "learning_rate": 9.251354921467482e-06, "loss": 17.8147, "step": 11005 }, { "epoch": 0.20118083609044546, "grad_norm": 7.020304115906132, "learning_rate": 9.25119910976967e-06, "loss": 17.8303, "step": 11006 }, { "epoch": 0.201199115286892, "grad_norm": 7.980386483023567, "learning_rate": 9.251043283171777e-06, "loss": 18.3758, "step": 11007 }, { "epoch": 0.2012173944833385, "grad_norm": 7.526697060476995, "learning_rate": 9.250887441674345e-06, "loss": 18.1684, "step": 11008 }, { "epoch": 0.20123567367978504, "grad_norm": 7.133275257449116, "learning_rate": 9.250731585277924e-06, "loss": 17.7657, "step": 11009 }, { "epoch": 0.20125395287623157, "grad_norm": 7.324366944288287, "learning_rate": 9.250575713983056e-06, "loss": 17.7502, "step": 11010 }, { "epoch": 0.20127223207267808, "grad_norm": 5.644457924810181, "learning_rate": 9.250419827790287e-06, "loss": 17.3969, "step": 11011 }, { "epoch": 0.20129051126912462, "grad_norm": 7.12453096673587, "learning_rate": 9.250263926700168e-06, "loss": 17.6701, "step": 11012 }, { "epoch": 0.20130879046557112, "grad_norm": 7.7097577919955365, "learning_rate": 9.25010801071324e-06, "loss": 17.8423, "step": 11013 }, { "epoch": 0.20132706966201766, "grad_norm": 7.522354809228273, "learning_rate": 9.249952079830055e-06, "loss": 17.8738, "step": 11014 }, { "epoch": 0.20134534885846417, "grad_norm": 7.91721991854234, "learning_rate": 9.249796134051156e-06, "loss": 18.1216, "step": 11015 }, { "epoch": 0.2013636280549107, "grad_norm": 7.953338394016145, "learning_rate": 9.24964017337709e-06, "loss": 18.6414, "step": 11016 }, { "epoch": 0.20138190725135724, "grad_norm": 5.907648607896258, "learning_rate": 9.249484197808405e-06, "loss": 17.4272, "step": 11017 }, { "epoch": 0.20140018644780375, "grad_norm": 7.452040030432068, "learning_rate": 9.249328207345645e-06, "loss": 18.0281, "step": 11018 }, { "epoch": 0.20141846564425028, "grad_norm": 12.690281287890302, "learning_rate": 9.24917220198936e-06, "loss": 17.2009, "step": 11019 }, { "epoch": 0.2014367448406968, "grad_norm": 5.707084774368487, "learning_rate": 9.249016181740093e-06, "loss": 17.3794, "step": 11020 }, { "epoch": 0.20145502403714333, "grad_norm": 7.7033771874914425, "learning_rate": 9.248860146598395e-06, "loss": 18.2721, "step": 11021 }, { "epoch": 0.20147330323358986, "grad_norm": 7.073375834259554, "learning_rate": 9.24870409656481e-06, "loss": 17.5917, "step": 11022 }, { "epoch": 0.20149158243003637, "grad_norm": 5.304524765367442, "learning_rate": 9.248548031639885e-06, "loss": 17.1065, "step": 11023 }, { "epoch": 0.2015098616264829, "grad_norm": 8.004742125968127, "learning_rate": 9.24839195182417e-06, "loss": 18.5089, "step": 11024 }, { "epoch": 0.2015281408229294, "grad_norm": 7.681500562120354, "learning_rate": 9.248235857118209e-06, "loss": 17.8945, "step": 11025 }, { "epoch": 0.20154642001937595, "grad_norm": 6.671975679551059, "learning_rate": 9.248079747522549e-06, "loss": 17.4852, "step": 11026 }, { "epoch": 0.20156469921582248, "grad_norm": 6.981758276056119, "learning_rate": 9.247923623037739e-06, "loss": 18.1079, "step": 11027 }, { "epoch": 0.201582978412269, "grad_norm": 8.387298847381826, "learning_rate": 9.247767483664325e-06, "loss": 18.0783, "step": 11028 }, { "epoch": 0.20160125760871553, "grad_norm": 5.622659635224309, "learning_rate": 9.247611329402854e-06, "loss": 17.0959, "step": 11029 }, { "epoch": 0.20161953680516204, "grad_norm": 6.730399075504475, "learning_rate": 9.247455160253874e-06, "loss": 17.6984, "step": 11030 }, { "epoch": 0.20163781600160857, "grad_norm": 6.899416727690391, "learning_rate": 9.247298976217933e-06, "loss": 17.6986, "step": 11031 }, { "epoch": 0.20165609519805508, "grad_norm": 6.0520197184089595, "learning_rate": 9.247142777295578e-06, "loss": 17.3721, "step": 11032 }, { "epoch": 0.20167437439450162, "grad_norm": 6.393644475255894, "learning_rate": 9.246986563487355e-06, "loss": 17.461, "step": 11033 }, { "epoch": 0.20169265359094815, "grad_norm": 6.182542631902963, "learning_rate": 9.246830334793812e-06, "loss": 17.4043, "step": 11034 }, { "epoch": 0.20171093278739466, "grad_norm": 7.691680825883224, "learning_rate": 9.246674091215499e-06, "loss": 17.9927, "step": 11035 }, { "epoch": 0.2017292119838412, "grad_norm": 5.994311075954899, "learning_rate": 9.246517832752961e-06, "loss": 17.284, "step": 11036 }, { "epoch": 0.2017474911802877, "grad_norm": 5.222483291875188, "learning_rate": 9.246361559406747e-06, "loss": 16.9912, "step": 11037 }, { "epoch": 0.20176577037673424, "grad_norm": 6.857095782740968, "learning_rate": 9.246205271177405e-06, "loss": 17.9019, "step": 11038 }, { "epoch": 0.20178404957318077, "grad_norm": 6.771680986195066, "learning_rate": 9.246048968065479e-06, "loss": 17.588, "step": 11039 }, { "epoch": 0.20180232876962728, "grad_norm": 6.828068076002624, "learning_rate": 9.245892650071521e-06, "loss": 17.5896, "step": 11040 }, { "epoch": 0.20182060796607382, "grad_norm": 18.483989187270236, "learning_rate": 9.245736317196079e-06, "loss": 17.829, "step": 11041 }, { "epoch": 0.20183888716252033, "grad_norm": 6.282962753229796, "learning_rate": 9.2455799694397e-06, "loss": 17.3659, "step": 11042 }, { "epoch": 0.20185716635896686, "grad_norm": 6.50966517020041, "learning_rate": 9.24542360680293e-06, "loss": 17.7501, "step": 11043 }, { "epoch": 0.2018754455554134, "grad_norm": 6.407674126181323, "learning_rate": 9.245267229286319e-06, "loss": 17.4041, "step": 11044 }, { "epoch": 0.2018937247518599, "grad_norm": 6.749213381678077, "learning_rate": 9.245110836890415e-06, "loss": 17.6395, "step": 11045 }, { "epoch": 0.20191200394830644, "grad_norm": 6.644572490671224, "learning_rate": 9.244954429615766e-06, "loss": 17.4293, "step": 11046 }, { "epoch": 0.20193028314475295, "grad_norm": 6.201464127877089, "learning_rate": 9.244798007462919e-06, "loss": 17.2814, "step": 11047 }, { "epoch": 0.20194856234119948, "grad_norm": 6.999797919378978, "learning_rate": 9.244641570432426e-06, "loss": 17.9026, "step": 11048 }, { "epoch": 0.201966841537646, "grad_norm": 8.051882457201522, "learning_rate": 9.24448511852483e-06, "loss": 18.3227, "step": 11049 }, { "epoch": 0.20198512073409253, "grad_norm": 7.266519999310683, "learning_rate": 9.244328651740684e-06, "loss": 17.7956, "step": 11050 }, { "epoch": 0.20200339993053906, "grad_norm": 8.556487865262474, "learning_rate": 9.244172170080532e-06, "loss": 18.994, "step": 11051 }, { "epoch": 0.20202167912698557, "grad_norm": 6.118251722258119, "learning_rate": 9.244015673544925e-06, "loss": 17.5535, "step": 11052 }, { "epoch": 0.2020399583234321, "grad_norm": 7.353448160888481, "learning_rate": 9.243859162134414e-06, "loss": 17.6268, "step": 11053 }, { "epoch": 0.20205823751987861, "grad_norm": 6.1185326123313315, "learning_rate": 9.243702635849542e-06, "loss": 17.36, "step": 11054 }, { "epoch": 0.20207651671632515, "grad_norm": 6.173621161907901, "learning_rate": 9.243546094690863e-06, "loss": 17.5314, "step": 11055 }, { "epoch": 0.20209479591277169, "grad_norm": 7.005533150540325, "learning_rate": 9.243389538658922e-06, "loss": 17.8785, "step": 11056 }, { "epoch": 0.2021130751092182, "grad_norm": 8.048076185664863, "learning_rate": 9.243232967754269e-06, "loss": 18.4834, "step": 11057 }, { "epoch": 0.20213135430566473, "grad_norm": 5.875936160734458, "learning_rate": 9.243076381977453e-06, "loss": 16.9904, "step": 11058 }, { "epoch": 0.20214963350211124, "grad_norm": 6.812158374551983, "learning_rate": 9.242919781329021e-06, "loss": 17.4696, "step": 11059 }, { "epoch": 0.20216791269855777, "grad_norm": 6.78174248427758, "learning_rate": 9.242763165809525e-06, "loss": 17.8872, "step": 11060 }, { "epoch": 0.2021861918950043, "grad_norm": 6.690291000638457, "learning_rate": 9.24260653541951e-06, "loss": 17.7383, "step": 11061 }, { "epoch": 0.20220447109145082, "grad_norm": 7.568173641090415, "learning_rate": 9.24244989015953e-06, "loss": 17.9104, "step": 11062 }, { "epoch": 0.20222275028789735, "grad_norm": 5.596928581622695, "learning_rate": 9.24229323003013e-06, "loss": 17.2972, "step": 11063 }, { "epoch": 0.20224102948434386, "grad_norm": 6.015883396400564, "learning_rate": 9.242136555031862e-06, "loss": 17.4608, "step": 11064 }, { "epoch": 0.2022593086807904, "grad_norm": 6.328501309808907, "learning_rate": 9.241979865165271e-06, "loss": 17.1676, "step": 11065 }, { "epoch": 0.2022775878772369, "grad_norm": 7.222603573510737, "learning_rate": 9.24182316043091e-06, "loss": 17.8221, "step": 11066 }, { "epoch": 0.20229586707368344, "grad_norm": 7.106749197423903, "learning_rate": 9.241666440829326e-06, "loss": 18.1133, "step": 11067 }, { "epoch": 0.20231414627012997, "grad_norm": 5.983886671823255, "learning_rate": 9.241509706361072e-06, "loss": 17.2629, "step": 11068 }, { "epoch": 0.20233242546657648, "grad_norm": 7.179488341236794, "learning_rate": 9.24135295702669e-06, "loss": 17.8632, "step": 11069 }, { "epoch": 0.20235070466302302, "grad_norm": 6.524423525191377, "learning_rate": 9.241196192826738e-06, "loss": 17.7955, "step": 11070 }, { "epoch": 0.20236898385946953, "grad_norm": 6.264568453092564, "learning_rate": 9.24103941376176e-06, "loss": 17.2563, "step": 11071 }, { "epoch": 0.20238726305591606, "grad_norm": 8.58333432459693, "learning_rate": 9.240882619832306e-06, "loss": 18.3223, "step": 11072 }, { "epoch": 0.2024055422523626, "grad_norm": 7.066775398865614, "learning_rate": 9.240725811038927e-06, "loss": 17.8233, "step": 11073 }, { "epoch": 0.2024238214488091, "grad_norm": 6.8026509708611735, "learning_rate": 9.240568987382173e-06, "loss": 17.5153, "step": 11074 }, { "epoch": 0.20244210064525564, "grad_norm": 6.634238330328132, "learning_rate": 9.240412148862591e-06, "loss": 17.5084, "step": 11075 }, { "epoch": 0.20246037984170215, "grad_norm": 8.026273114106043, "learning_rate": 9.240255295480734e-06, "loss": 18.0841, "step": 11076 }, { "epoch": 0.20247865903814868, "grad_norm": 6.690087092426927, "learning_rate": 9.240098427237148e-06, "loss": 17.6633, "step": 11077 }, { "epoch": 0.20249693823459522, "grad_norm": 5.8175423129199775, "learning_rate": 9.239941544132386e-06, "loss": 17.0738, "step": 11078 }, { "epoch": 0.20251521743104173, "grad_norm": 6.414427782891805, "learning_rate": 9.239784646166999e-06, "loss": 17.1785, "step": 11079 }, { "epoch": 0.20253349662748826, "grad_norm": 8.766801582188432, "learning_rate": 9.239627733341531e-06, "loss": 18.1234, "step": 11080 }, { "epoch": 0.20255177582393477, "grad_norm": 7.365357716575713, "learning_rate": 9.239470805656538e-06, "loss": 17.6308, "step": 11081 }, { "epoch": 0.2025700550203813, "grad_norm": 5.164235986921989, "learning_rate": 9.239313863112567e-06, "loss": 16.938, "step": 11082 }, { "epoch": 0.20258833421682781, "grad_norm": 6.453351094731791, "learning_rate": 9.23915690571017e-06, "loss": 17.3139, "step": 11083 }, { "epoch": 0.20260661341327435, "grad_norm": 6.6864016587948, "learning_rate": 9.238999933449894e-06, "loss": 17.4998, "step": 11084 }, { "epoch": 0.20262489260972089, "grad_norm": 6.803955050509063, "learning_rate": 9.238842946332292e-06, "loss": 17.7202, "step": 11085 }, { "epoch": 0.2026431718061674, "grad_norm": 6.7583508701882975, "learning_rate": 9.238685944357913e-06, "loss": 17.5772, "step": 11086 }, { "epoch": 0.20266145100261393, "grad_norm": 7.021391029608066, "learning_rate": 9.238528927527308e-06, "loss": 17.8551, "step": 11087 }, { "epoch": 0.20267973019906044, "grad_norm": 5.875048450870385, "learning_rate": 9.238371895841027e-06, "loss": 17.4093, "step": 11088 }, { "epoch": 0.20269800939550697, "grad_norm": 6.7214107399667835, "learning_rate": 9.23821484929962e-06, "loss": 17.1692, "step": 11089 }, { "epoch": 0.2027162885919535, "grad_norm": 8.274023358814, "learning_rate": 9.238057787903637e-06, "loss": 18.4087, "step": 11090 }, { "epoch": 0.20273456778840002, "grad_norm": 6.979774278339356, "learning_rate": 9.23790071165363e-06, "loss": 17.2612, "step": 11091 }, { "epoch": 0.20275284698484655, "grad_norm": 7.624076237694389, "learning_rate": 9.237743620550148e-06, "loss": 17.7562, "step": 11092 }, { "epoch": 0.20277112618129306, "grad_norm": 7.896095765190013, "learning_rate": 9.237586514593743e-06, "loss": 18.3727, "step": 11093 }, { "epoch": 0.2027894053777396, "grad_norm": 7.956270754415964, "learning_rate": 9.237429393784965e-06, "loss": 18.0114, "step": 11094 }, { "epoch": 0.20280768457418613, "grad_norm": 7.989114623913356, "learning_rate": 9.237272258124365e-06, "loss": 18.1503, "step": 11095 }, { "epoch": 0.20282596377063264, "grad_norm": 7.1258781155501705, "learning_rate": 9.237115107612493e-06, "loss": 17.8629, "step": 11096 }, { "epoch": 0.20284424296707917, "grad_norm": 7.152743907610953, "learning_rate": 9.236957942249902e-06, "loss": 17.8655, "step": 11097 }, { "epoch": 0.20286252216352568, "grad_norm": 7.663056514872933, "learning_rate": 9.23680076203714e-06, "loss": 17.8698, "step": 11098 }, { "epoch": 0.20288080135997222, "grad_norm": 6.719182007292677, "learning_rate": 9.236643566974758e-06, "loss": 17.5382, "step": 11099 }, { "epoch": 0.20289908055641873, "grad_norm": 6.699845952976791, "learning_rate": 9.236486357063307e-06, "loss": 17.601, "step": 11100 }, { "epoch": 0.20291735975286526, "grad_norm": 6.53621108530564, "learning_rate": 9.23632913230334e-06, "loss": 17.469, "step": 11101 }, { "epoch": 0.2029356389493118, "grad_norm": 6.2682005317902645, "learning_rate": 9.236171892695408e-06, "loss": 17.6244, "step": 11102 }, { "epoch": 0.2029539181457583, "grad_norm": 6.7197314798793135, "learning_rate": 9.23601463824006e-06, "loss": 17.5137, "step": 11103 }, { "epoch": 0.20297219734220484, "grad_norm": 6.336006258595372, "learning_rate": 9.23585736893785e-06, "loss": 17.3593, "step": 11104 }, { "epoch": 0.20299047653865135, "grad_norm": 6.137737038681827, "learning_rate": 9.235700084789325e-06, "loss": 17.0645, "step": 11105 }, { "epoch": 0.20300875573509788, "grad_norm": 7.834039642186673, "learning_rate": 9.23554278579504e-06, "loss": 18.0745, "step": 11106 }, { "epoch": 0.20302703493154442, "grad_norm": 6.954668084270081, "learning_rate": 9.235385471955546e-06, "loss": 17.7245, "step": 11107 }, { "epoch": 0.20304531412799093, "grad_norm": 7.721553135183198, "learning_rate": 9.235228143271392e-06, "loss": 17.9186, "step": 11108 }, { "epoch": 0.20306359332443746, "grad_norm": 9.495352412979067, "learning_rate": 9.23507079974313e-06, "loss": 18.7159, "step": 11109 }, { "epoch": 0.20308187252088397, "grad_norm": 5.810202473978871, "learning_rate": 9.234913441371314e-06, "loss": 17.2472, "step": 11110 }, { "epoch": 0.2031001517173305, "grad_norm": 6.442896637173475, "learning_rate": 9.234756068156494e-06, "loss": 17.4719, "step": 11111 }, { "epoch": 0.20311843091377704, "grad_norm": 6.402017573467823, "learning_rate": 9.234598680099222e-06, "loss": 17.6745, "step": 11112 }, { "epoch": 0.20313671011022355, "grad_norm": 5.4501875542114115, "learning_rate": 9.234441277200048e-06, "loss": 17.0627, "step": 11113 }, { "epoch": 0.2031549893066701, "grad_norm": 5.838435571181767, "learning_rate": 9.234283859459525e-06, "loss": 17.2981, "step": 11114 }, { "epoch": 0.2031732685031166, "grad_norm": 7.30484957185883, "learning_rate": 9.234126426878203e-06, "loss": 17.6792, "step": 11115 }, { "epoch": 0.20319154769956313, "grad_norm": 8.679037738220984, "learning_rate": 9.233968979456637e-06, "loss": 18.3425, "step": 11116 }, { "epoch": 0.20320982689600964, "grad_norm": 6.49102380204587, "learning_rate": 9.233811517195378e-06, "loss": 17.5286, "step": 11117 }, { "epoch": 0.20322810609245617, "grad_norm": 6.991123610421662, "learning_rate": 9.233654040094976e-06, "loss": 17.8174, "step": 11118 }, { "epoch": 0.2032463852889027, "grad_norm": 6.168237386342085, "learning_rate": 9.233496548155984e-06, "loss": 17.4737, "step": 11119 }, { "epoch": 0.20326466448534922, "grad_norm": 6.4433712741488485, "learning_rate": 9.233339041378952e-06, "loss": 17.4473, "step": 11120 }, { "epoch": 0.20328294368179575, "grad_norm": 5.891067840361349, "learning_rate": 9.233181519764437e-06, "loss": 17.2874, "step": 11121 }, { "epoch": 0.20330122287824226, "grad_norm": 8.143132742497487, "learning_rate": 9.233023983312987e-06, "loss": 18.0829, "step": 11122 }, { "epoch": 0.2033195020746888, "grad_norm": 8.487263957275987, "learning_rate": 9.232866432025156e-06, "loss": 18.3101, "step": 11123 }, { "epoch": 0.20333778127113533, "grad_norm": 6.394992361882281, "learning_rate": 9.232708865901495e-06, "loss": 17.3551, "step": 11124 }, { "epoch": 0.20335606046758184, "grad_norm": 6.575868690305045, "learning_rate": 9.232551284942554e-06, "loss": 17.5616, "step": 11125 }, { "epoch": 0.20337433966402838, "grad_norm": 5.0399235075632856, "learning_rate": 9.23239368914889e-06, "loss": 16.9346, "step": 11126 }, { "epoch": 0.20339261886047488, "grad_norm": 9.2722299744395, "learning_rate": 9.232236078521055e-06, "loss": 18.2814, "step": 11127 }, { "epoch": 0.20341089805692142, "grad_norm": 6.8905819906211585, "learning_rate": 9.232078453059598e-06, "loss": 17.5832, "step": 11128 }, { "epoch": 0.20342917725336795, "grad_norm": 6.132241847838988, "learning_rate": 9.231920812765074e-06, "loss": 17.467, "step": 11129 }, { "epoch": 0.20344745644981446, "grad_norm": 6.379810305024003, "learning_rate": 9.231763157638036e-06, "loss": 17.4435, "step": 11130 }, { "epoch": 0.203465735646261, "grad_norm": 5.808808655742433, "learning_rate": 9.231605487679033e-06, "loss": 17.2186, "step": 11131 }, { "epoch": 0.2034840148427075, "grad_norm": 6.0394033521351345, "learning_rate": 9.23144780288862e-06, "loss": 17.122, "step": 11132 }, { "epoch": 0.20350229403915404, "grad_norm": 8.642312035088267, "learning_rate": 9.231290103267352e-06, "loss": 18.5249, "step": 11133 }, { "epoch": 0.20352057323560055, "grad_norm": 6.4284436306351385, "learning_rate": 9.231132388815778e-06, "loss": 17.4943, "step": 11134 }, { "epoch": 0.20353885243204708, "grad_norm": 8.370086693427135, "learning_rate": 9.230974659534451e-06, "loss": 18.1642, "step": 11135 }, { "epoch": 0.20355713162849362, "grad_norm": 8.025758280170024, "learning_rate": 9.230816915423928e-06, "loss": 18.0183, "step": 11136 }, { "epoch": 0.20357541082494013, "grad_norm": 6.295715707343801, "learning_rate": 9.230659156484755e-06, "loss": 17.3867, "step": 11137 }, { "epoch": 0.20359369002138666, "grad_norm": 7.556217004668174, "learning_rate": 9.230501382717492e-06, "loss": 17.8981, "step": 11138 }, { "epoch": 0.20361196921783317, "grad_norm": 5.740317766351861, "learning_rate": 9.230343594122687e-06, "loss": 17.3958, "step": 11139 }, { "epoch": 0.2036302484142797, "grad_norm": 6.944209250198379, "learning_rate": 9.230185790700895e-06, "loss": 17.7332, "step": 11140 }, { "epoch": 0.20364852761072624, "grad_norm": 6.564848811373634, "learning_rate": 9.230027972452669e-06, "loss": 17.4095, "step": 11141 }, { "epoch": 0.20366680680717275, "grad_norm": 6.854415444915705, "learning_rate": 9.229870139378562e-06, "loss": 17.5654, "step": 11142 }, { "epoch": 0.2036850860036193, "grad_norm": 7.647143387274446, "learning_rate": 9.229712291479128e-06, "loss": 18.1861, "step": 11143 }, { "epoch": 0.2037033652000658, "grad_norm": 6.264718415869328, "learning_rate": 9.229554428754918e-06, "loss": 17.5786, "step": 11144 }, { "epoch": 0.20372164439651233, "grad_norm": 7.050400247635081, "learning_rate": 9.229396551206488e-06, "loss": 17.6802, "step": 11145 }, { "epoch": 0.20373992359295887, "grad_norm": 7.013656808967406, "learning_rate": 9.22923865883439e-06, "loss": 17.7487, "step": 11146 }, { "epoch": 0.20375820278940537, "grad_norm": 7.176620897988137, "learning_rate": 9.229080751639177e-06, "loss": 17.7222, "step": 11147 }, { "epoch": 0.2037764819858519, "grad_norm": 7.104754563617455, "learning_rate": 9.228922829621403e-06, "loss": 17.6496, "step": 11148 }, { "epoch": 0.20379476118229842, "grad_norm": 7.098889295955707, "learning_rate": 9.228764892781622e-06, "loss": 18.0888, "step": 11149 }, { "epoch": 0.20381304037874495, "grad_norm": 8.365287733535423, "learning_rate": 9.228606941120386e-06, "loss": 18.5649, "step": 11150 }, { "epoch": 0.20383131957519146, "grad_norm": 7.066472823936408, "learning_rate": 9.228448974638252e-06, "loss": 17.8336, "step": 11151 }, { "epoch": 0.203849598771638, "grad_norm": 7.190799257549998, "learning_rate": 9.228290993335768e-06, "loss": 17.4468, "step": 11152 }, { "epoch": 0.20386787796808453, "grad_norm": 7.424093952787657, "learning_rate": 9.228132997213493e-06, "loss": 17.9261, "step": 11153 }, { "epoch": 0.20388615716453104, "grad_norm": 6.104952908897735, "learning_rate": 9.227974986271976e-06, "loss": 17.3901, "step": 11154 }, { "epoch": 0.20390443636097758, "grad_norm": 5.532896939303763, "learning_rate": 9.227816960511778e-06, "loss": 17.4417, "step": 11155 }, { "epoch": 0.20392271555742408, "grad_norm": 6.510258284011305, "learning_rate": 9.227658919933446e-06, "loss": 17.5093, "step": 11156 }, { "epoch": 0.20394099475387062, "grad_norm": 6.876360849657799, "learning_rate": 9.227500864537536e-06, "loss": 17.7952, "step": 11157 }, { "epoch": 0.20395927395031715, "grad_norm": 5.942607970633072, "learning_rate": 9.227342794324603e-06, "loss": 17.3487, "step": 11158 }, { "epoch": 0.20397755314676366, "grad_norm": 7.574700009519517, "learning_rate": 9.2271847092952e-06, "loss": 18.1533, "step": 11159 }, { "epoch": 0.2039958323432102, "grad_norm": 6.682538345208364, "learning_rate": 9.227026609449881e-06, "loss": 17.4988, "step": 11160 }, { "epoch": 0.2040141115396567, "grad_norm": 7.752221844167152, "learning_rate": 9.226868494789203e-06, "loss": 18.0337, "step": 11161 }, { "epoch": 0.20403239073610324, "grad_norm": 6.064331059840321, "learning_rate": 9.226710365313714e-06, "loss": 17.4478, "step": 11162 }, { "epoch": 0.20405066993254978, "grad_norm": 6.199532295651936, "learning_rate": 9.226552221023974e-06, "loss": 17.4396, "step": 11163 }, { "epoch": 0.20406894912899629, "grad_norm": 7.424588487200107, "learning_rate": 9.226394061920537e-06, "loss": 17.9378, "step": 11164 }, { "epoch": 0.20408722832544282, "grad_norm": 7.034173364726601, "learning_rate": 9.226235888003952e-06, "loss": 17.9955, "step": 11165 }, { "epoch": 0.20410550752188933, "grad_norm": 6.187052812857753, "learning_rate": 9.226077699274778e-06, "loss": 17.395, "step": 11166 }, { "epoch": 0.20412378671833586, "grad_norm": 6.251115632206645, "learning_rate": 9.225919495733569e-06, "loss": 17.6024, "step": 11167 }, { "epoch": 0.20414206591478237, "grad_norm": 7.201508065677238, "learning_rate": 9.225761277380878e-06, "loss": 17.7733, "step": 11168 }, { "epoch": 0.2041603451112289, "grad_norm": 6.595517365453235, "learning_rate": 9.225603044217261e-06, "loss": 17.6297, "step": 11169 }, { "epoch": 0.20417862430767544, "grad_norm": 7.720351350143014, "learning_rate": 9.225444796243273e-06, "loss": 17.9286, "step": 11170 }, { "epoch": 0.20419690350412195, "grad_norm": 6.852815619637015, "learning_rate": 9.225286533459468e-06, "loss": 17.5033, "step": 11171 }, { "epoch": 0.2042151827005685, "grad_norm": 7.025534011609405, "learning_rate": 9.225128255866397e-06, "loss": 17.7254, "step": 11172 }, { "epoch": 0.204233461897015, "grad_norm": 8.401970977588938, "learning_rate": 9.224969963464623e-06, "loss": 18.3252, "step": 11173 }, { "epoch": 0.20425174109346153, "grad_norm": 5.386890150228694, "learning_rate": 9.224811656254694e-06, "loss": 17.17, "step": 11174 }, { "epoch": 0.20427002028990807, "grad_norm": 7.185666553147642, "learning_rate": 9.224653334237163e-06, "loss": 18.0141, "step": 11175 }, { "epoch": 0.20428829948635457, "grad_norm": 7.066949308004852, "learning_rate": 9.224494997412593e-06, "loss": 17.7945, "step": 11176 }, { "epoch": 0.2043065786828011, "grad_norm": 7.613231902013903, "learning_rate": 9.224336645781533e-06, "loss": 17.8731, "step": 11177 }, { "epoch": 0.20432485787924762, "grad_norm": 6.427204203579496, "learning_rate": 9.22417827934454e-06, "loss": 17.5776, "step": 11178 }, { "epoch": 0.20434313707569415, "grad_norm": 8.003971223075348, "learning_rate": 9.224019898102168e-06, "loss": 17.8197, "step": 11179 }, { "epoch": 0.2043614162721407, "grad_norm": 5.5872193885185455, "learning_rate": 9.223861502054974e-06, "loss": 17.19, "step": 11180 }, { "epoch": 0.2043796954685872, "grad_norm": 6.276657648988144, "learning_rate": 9.223703091203511e-06, "loss": 17.3117, "step": 11181 }, { "epoch": 0.20439797466503373, "grad_norm": 6.897398207329288, "learning_rate": 9.223544665548337e-06, "loss": 17.6208, "step": 11182 }, { "epoch": 0.20441625386148024, "grad_norm": 7.3221433396788305, "learning_rate": 9.223386225090002e-06, "loss": 17.4744, "step": 11183 }, { "epoch": 0.20443453305792678, "grad_norm": 6.278377547536221, "learning_rate": 9.223227769829068e-06, "loss": 17.3664, "step": 11184 }, { "epoch": 0.20445281225437328, "grad_norm": 7.80024806356108, "learning_rate": 9.223069299766085e-06, "loss": 18.1642, "step": 11185 }, { "epoch": 0.20447109145081982, "grad_norm": 7.538758445830707, "learning_rate": 9.222910814901611e-06, "loss": 17.8844, "step": 11186 }, { "epoch": 0.20448937064726636, "grad_norm": 6.2320072295754825, "learning_rate": 9.222752315236203e-06, "loss": 16.996, "step": 11187 }, { "epoch": 0.20450764984371286, "grad_norm": 5.768389402983314, "learning_rate": 9.222593800770411e-06, "loss": 17.2548, "step": 11188 }, { "epoch": 0.2045259290401594, "grad_norm": 9.386362018993129, "learning_rate": 9.222435271504797e-06, "loss": 18.8809, "step": 11189 }, { "epoch": 0.2045442082366059, "grad_norm": 6.2184328619791085, "learning_rate": 9.222276727439914e-06, "loss": 17.5199, "step": 11190 }, { "epoch": 0.20456248743305244, "grad_norm": 7.577401563324187, "learning_rate": 9.222118168576316e-06, "loss": 17.8251, "step": 11191 }, { "epoch": 0.20458076662949898, "grad_norm": 7.8018276763599195, "learning_rate": 9.22195959491456e-06, "loss": 17.7496, "step": 11192 }, { "epoch": 0.20459904582594549, "grad_norm": 7.7073048193379465, "learning_rate": 9.221801006455204e-06, "loss": 18.5523, "step": 11193 }, { "epoch": 0.20461732502239202, "grad_norm": 7.672101512759613, "learning_rate": 9.2216424031988e-06, "loss": 17.8634, "step": 11194 }, { "epoch": 0.20463560421883853, "grad_norm": 7.427337842379527, "learning_rate": 9.221483785145906e-06, "loss": 17.9564, "step": 11195 }, { "epoch": 0.20465388341528506, "grad_norm": 6.569593973534207, "learning_rate": 9.221325152297079e-06, "loss": 17.6455, "step": 11196 }, { "epoch": 0.2046721626117316, "grad_norm": 8.427328028996476, "learning_rate": 9.221166504652871e-06, "loss": 18.462, "step": 11197 }, { "epoch": 0.2046904418081781, "grad_norm": 6.744904946082712, "learning_rate": 9.221007842213843e-06, "loss": 17.515, "step": 11198 }, { "epoch": 0.20470872100462464, "grad_norm": 8.33352714628349, "learning_rate": 9.220849164980548e-06, "loss": 18.5856, "step": 11199 }, { "epoch": 0.20472700020107115, "grad_norm": 7.956233550220343, "learning_rate": 9.220690472953542e-06, "loss": 18.1, "step": 11200 }, { "epoch": 0.2047452793975177, "grad_norm": 7.034235858966999, "learning_rate": 9.220531766133383e-06, "loss": 17.6427, "step": 11201 }, { "epoch": 0.2047635585939642, "grad_norm": 7.374752028705149, "learning_rate": 9.220373044520628e-06, "loss": 17.513, "step": 11202 }, { "epoch": 0.20478183779041073, "grad_norm": 7.120915378950373, "learning_rate": 9.22021430811583e-06, "loss": 17.5448, "step": 11203 }, { "epoch": 0.20480011698685727, "grad_norm": 5.5208955331425065, "learning_rate": 9.220055556919547e-06, "loss": 17.1941, "step": 11204 }, { "epoch": 0.20481839618330377, "grad_norm": 7.252932148688536, "learning_rate": 9.219896790932334e-06, "loss": 17.8942, "step": 11205 }, { "epoch": 0.2048366753797503, "grad_norm": 6.855217393125561, "learning_rate": 9.219738010154753e-06, "loss": 17.6032, "step": 11206 }, { "epoch": 0.20485495457619682, "grad_norm": 7.905595712034856, "learning_rate": 9.219579214587354e-06, "loss": 18.0844, "step": 11207 }, { "epoch": 0.20487323377264335, "grad_norm": 7.305491779279216, "learning_rate": 9.219420404230694e-06, "loss": 17.8717, "step": 11208 }, { "epoch": 0.2048915129690899, "grad_norm": 7.109208649456086, "learning_rate": 9.219261579085335e-06, "loss": 17.9267, "step": 11209 }, { "epoch": 0.2049097921655364, "grad_norm": 6.880451246231312, "learning_rate": 9.21910273915183e-06, "loss": 17.6225, "step": 11210 }, { "epoch": 0.20492807136198293, "grad_norm": 7.476702279942833, "learning_rate": 9.218943884430733e-06, "loss": 18.0985, "step": 11211 }, { "epoch": 0.20494635055842944, "grad_norm": 6.738653096375818, "learning_rate": 9.218785014922606e-06, "loss": 17.3817, "step": 11212 }, { "epoch": 0.20496462975487598, "grad_norm": 6.053858809942252, "learning_rate": 9.218626130628003e-06, "loss": 17.3523, "step": 11213 }, { "epoch": 0.2049829089513225, "grad_norm": 7.031194283853823, "learning_rate": 9.218467231547482e-06, "loss": 17.9715, "step": 11214 }, { "epoch": 0.20500118814776902, "grad_norm": 7.617933246938375, "learning_rate": 9.2183083176816e-06, "loss": 18.2486, "step": 11215 }, { "epoch": 0.20501946734421556, "grad_norm": 7.016914651326241, "learning_rate": 9.218149389030913e-06, "loss": 17.7558, "step": 11216 }, { "epoch": 0.20503774654066206, "grad_norm": 6.314429775341909, "learning_rate": 9.21799044559598e-06, "loss": 17.4803, "step": 11217 }, { "epoch": 0.2050560257371086, "grad_norm": 6.298121876385616, "learning_rate": 9.217831487377354e-06, "loss": 17.2778, "step": 11218 }, { "epoch": 0.2050743049335551, "grad_norm": 6.463091502828433, "learning_rate": 9.217672514375594e-06, "loss": 17.6105, "step": 11219 }, { "epoch": 0.20509258413000164, "grad_norm": 7.081118131205817, "learning_rate": 9.217513526591259e-06, "loss": 17.6592, "step": 11220 }, { "epoch": 0.20511086332644818, "grad_norm": 7.573218858127629, "learning_rate": 9.217354524024905e-06, "loss": 17.7936, "step": 11221 }, { "epoch": 0.2051291425228947, "grad_norm": 8.4098199670913, "learning_rate": 9.21719550667709e-06, "loss": 17.9587, "step": 11222 }, { "epoch": 0.20514742171934122, "grad_norm": 6.121847899495171, "learning_rate": 9.21703647454837e-06, "loss": 17.4993, "step": 11223 }, { "epoch": 0.20516570091578773, "grad_norm": 7.413990228255566, "learning_rate": 9.216877427639303e-06, "loss": 17.9504, "step": 11224 }, { "epoch": 0.20518398011223427, "grad_norm": 7.076346612130487, "learning_rate": 9.216718365950448e-06, "loss": 18.0455, "step": 11225 }, { "epoch": 0.2052022593086808, "grad_norm": 8.428479294844449, "learning_rate": 9.21655928948236e-06, "loss": 17.9222, "step": 11226 }, { "epoch": 0.2052205385051273, "grad_norm": 9.552257974022613, "learning_rate": 9.216400198235598e-06, "loss": 18.1117, "step": 11227 }, { "epoch": 0.20523881770157384, "grad_norm": 5.859977160157482, "learning_rate": 9.216241092210718e-06, "loss": 17.3769, "step": 11228 }, { "epoch": 0.20525709689802035, "grad_norm": 6.6096858069978115, "learning_rate": 9.21608197140828e-06, "loss": 17.6359, "step": 11229 }, { "epoch": 0.2052753760944669, "grad_norm": 6.743129044468079, "learning_rate": 9.215922835828839e-06, "loss": 17.7402, "step": 11230 }, { "epoch": 0.20529365529091342, "grad_norm": 7.3244944650637, "learning_rate": 9.215763685472955e-06, "loss": 17.8573, "step": 11231 }, { "epoch": 0.20531193448735993, "grad_norm": 7.177089459925122, "learning_rate": 9.215604520341186e-06, "loss": 17.9606, "step": 11232 }, { "epoch": 0.20533021368380647, "grad_norm": 7.654941124166402, "learning_rate": 9.215445340434088e-06, "loss": 17.8949, "step": 11233 }, { "epoch": 0.20534849288025298, "grad_norm": 7.105827489680923, "learning_rate": 9.215286145752222e-06, "loss": 17.7934, "step": 11234 }, { "epoch": 0.2053667720766995, "grad_norm": 5.612250988367692, "learning_rate": 9.215126936296141e-06, "loss": 17.2452, "step": 11235 }, { "epoch": 0.20538505127314602, "grad_norm": 12.762976536213964, "learning_rate": 9.214967712066408e-06, "loss": 19.0867, "step": 11236 }, { "epoch": 0.20540333046959255, "grad_norm": 5.904836314826313, "learning_rate": 9.214808473063578e-06, "loss": 17.2909, "step": 11237 }, { "epoch": 0.2054216096660391, "grad_norm": 7.576621186342858, "learning_rate": 9.21464921928821e-06, "loss": 17.8573, "step": 11238 }, { "epoch": 0.2054398888624856, "grad_norm": 6.779356927404155, "learning_rate": 9.21448995074086e-06, "loss": 17.5983, "step": 11239 }, { "epoch": 0.20545816805893213, "grad_norm": 6.856019507235172, "learning_rate": 9.214330667422092e-06, "loss": 17.8275, "step": 11240 }, { "epoch": 0.20547644725537864, "grad_norm": 9.397441998703927, "learning_rate": 9.21417136933246e-06, "loss": 18.7988, "step": 11241 }, { "epoch": 0.20549472645182518, "grad_norm": 5.921991480316953, "learning_rate": 9.214012056472521e-06, "loss": 17.259, "step": 11242 }, { "epoch": 0.2055130056482717, "grad_norm": 7.2698720955148675, "learning_rate": 9.213852728842839e-06, "loss": 17.8498, "step": 11243 }, { "epoch": 0.20553128484471822, "grad_norm": 6.651611949906887, "learning_rate": 9.213693386443966e-06, "loss": 17.7188, "step": 11244 }, { "epoch": 0.20554956404116476, "grad_norm": 7.508074955197114, "learning_rate": 9.213534029276464e-06, "loss": 18.2089, "step": 11245 }, { "epoch": 0.20556784323761126, "grad_norm": 8.512802495561843, "learning_rate": 9.21337465734089e-06, "loss": 17.8531, "step": 11246 }, { "epoch": 0.2055861224340578, "grad_norm": 7.357293392894167, "learning_rate": 9.213215270637805e-06, "loss": 17.7409, "step": 11247 }, { "epoch": 0.20560440163050434, "grad_norm": 6.011106680943368, "learning_rate": 9.213055869167767e-06, "loss": 17.2643, "step": 11248 }, { "epoch": 0.20562268082695084, "grad_norm": 6.57061944873097, "learning_rate": 9.212896452931331e-06, "loss": 17.6767, "step": 11249 }, { "epoch": 0.20564096002339738, "grad_norm": 6.287256367807107, "learning_rate": 9.21273702192906e-06, "loss": 17.2238, "step": 11250 }, { "epoch": 0.2056592392198439, "grad_norm": 6.000760785422642, "learning_rate": 9.21257757616151e-06, "loss": 17.226, "step": 11251 }, { "epoch": 0.20567751841629042, "grad_norm": 6.890166883988605, "learning_rate": 9.212418115629243e-06, "loss": 17.8284, "step": 11252 }, { "epoch": 0.20569579761273693, "grad_norm": 7.627504871921874, "learning_rate": 9.212258640332815e-06, "loss": 17.4906, "step": 11253 }, { "epoch": 0.20571407680918347, "grad_norm": 7.70247386428855, "learning_rate": 9.212099150272786e-06, "loss": 18.3294, "step": 11254 }, { "epoch": 0.20573235600563, "grad_norm": 8.44945226014485, "learning_rate": 9.211939645449715e-06, "loss": 18.5711, "step": 11255 }, { "epoch": 0.2057506352020765, "grad_norm": 8.303653254049596, "learning_rate": 9.211780125864162e-06, "loss": 18.3465, "step": 11256 }, { "epoch": 0.20576891439852305, "grad_norm": 5.468438191150138, "learning_rate": 9.211620591516683e-06, "loss": 17.101, "step": 11257 }, { "epoch": 0.20578719359496955, "grad_norm": 7.026778992898313, "learning_rate": 9.211461042407841e-06, "loss": 17.995, "step": 11258 }, { "epoch": 0.2058054727914161, "grad_norm": 7.124654173790567, "learning_rate": 9.211301478538194e-06, "loss": 17.3347, "step": 11259 }, { "epoch": 0.20582375198786262, "grad_norm": 6.822023858200299, "learning_rate": 9.2111418999083e-06, "loss": 17.7613, "step": 11260 }, { "epoch": 0.20584203118430913, "grad_norm": 7.2272941338736905, "learning_rate": 9.210982306518719e-06, "loss": 17.6922, "step": 11261 }, { "epoch": 0.20586031038075567, "grad_norm": 6.602902486894802, "learning_rate": 9.21082269837001e-06, "loss": 17.5514, "step": 11262 }, { "epoch": 0.20587858957720218, "grad_norm": 7.480254531432755, "learning_rate": 9.210663075462733e-06, "loss": 18.2328, "step": 11263 }, { "epoch": 0.2058968687736487, "grad_norm": 6.128790145698104, "learning_rate": 9.210503437797448e-06, "loss": 17.3685, "step": 11264 }, { "epoch": 0.20591514797009525, "grad_norm": 8.293712419676533, "learning_rate": 9.210343785374713e-06, "loss": 18.1156, "step": 11265 }, { "epoch": 0.20593342716654175, "grad_norm": 7.404471374026537, "learning_rate": 9.21018411819509e-06, "loss": 17.6813, "step": 11266 }, { "epoch": 0.2059517063629883, "grad_norm": 6.296386617109181, "learning_rate": 9.210024436259135e-06, "loss": 17.4807, "step": 11267 }, { "epoch": 0.2059699855594348, "grad_norm": 7.517825134964458, "learning_rate": 9.20986473956741e-06, "loss": 17.9497, "step": 11268 }, { "epoch": 0.20598826475588133, "grad_norm": 7.198564620537514, "learning_rate": 9.209705028120475e-06, "loss": 18.061, "step": 11269 }, { "epoch": 0.20600654395232784, "grad_norm": 7.446215235124976, "learning_rate": 9.209545301918889e-06, "loss": 17.8568, "step": 11270 }, { "epoch": 0.20602482314877438, "grad_norm": 6.199927643465172, "learning_rate": 9.209385560963212e-06, "loss": 17.2508, "step": 11271 }, { "epoch": 0.2060431023452209, "grad_norm": 7.165829469897477, "learning_rate": 9.209225805254004e-06, "loss": 17.7853, "step": 11272 }, { "epoch": 0.20606138154166742, "grad_norm": 6.174745654746108, "learning_rate": 9.209066034791824e-06, "loss": 17.3414, "step": 11273 }, { "epoch": 0.20607966073811396, "grad_norm": 6.613461508753073, "learning_rate": 9.208906249577234e-06, "loss": 17.5057, "step": 11274 }, { "epoch": 0.20609793993456046, "grad_norm": 7.359812146534588, "learning_rate": 9.208746449610792e-06, "loss": 17.8326, "step": 11275 }, { "epoch": 0.206116219131007, "grad_norm": 6.829152532704157, "learning_rate": 9.20858663489306e-06, "loss": 17.6674, "step": 11276 }, { "epoch": 0.20613449832745354, "grad_norm": 7.597519963948508, "learning_rate": 9.208426805424596e-06, "loss": 18.0787, "step": 11277 }, { "epoch": 0.20615277752390004, "grad_norm": 6.740113924962893, "learning_rate": 9.208266961205961e-06, "loss": 17.5505, "step": 11278 }, { "epoch": 0.20617105672034658, "grad_norm": 6.659659448971432, "learning_rate": 9.208107102237717e-06, "loss": 17.5001, "step": 11279 }, { "epoch": 0.2061893359167931, "grad_norm": 6.516375911744139, "learning_rate": 9.207947228520421e-06, "loss": 17.4043, "step": 11280 }, { "epoch": 0.20620761511323962, "grad_norm": 7.591304504931219, "learning_rate": 9.207787340054637e-06, "loss": 17.5647, "step": 11281 }, { "epoch": 0.20622589430968616, "grad_norm": 6.431169455060361, "learning_rate": 9.207627436840922e-06, "loss": 17.4243, "step": 11282 }, { "epoch": 0.20624417350613267, "grad_norm": 5.904620637899576, "learning_rate": 9.207467518879838e-06, "loss": 17.2062, "step": 11283 }, { "epoch": 0.2062624527025792, "grad_norm": 6.255924391466615, "learning_rate": 9.207307586171946e-06, "loss": 17.4979, "step": 11284 }, { "epoch": 0.2062807318990257, "grad_norm": 5.775303579231696, "learning_rate": 9.207147638717807e-06, "loss": 17.4686, "step": 11285 }, { "epoch": 0.20629901109547225, "grad_norm": 8.48887826399413, "learning_rate": 9.20698767651798e-06, "loss": 18.7141, "step": 11286 }, { "epoch": 0.20631729029191875, "grad_norm": 5.258576051977037, "learning_rate": 9.206827699573024e-06, "loss": 16.9402, "step": 11287 }, { "epoch": 0.2063355694883653, "grad_norm": 8.300980279165882, "learning_rate": 9.206667707883504e-06, "loss": 18.4599, "step": 11288 }, { "epoch": 0.20635384868481182, "grad_norm": 6.571769161341952, "learning_rate": 9.206507701449978e-06, "loss": 17.6144, "step": 11289 }, { "epoch": 0.20637212788125833, "grad_norm": 7.422547512778995, "learning_rate": 9.206347680273008e-06, "loss": 18.0077, "step": 11290 }, { "epoch": 0.20639040707770487, "grad_norm": 5.961834342625839, "learning_rate": 9.206187644353155e-06, "loss": 17.343, "step": 11291 }, { "epoch": 0.20640868627415138, "grad_norm": 7.77587413166556, "learning_rate": 9.206027593690978e-06, "loss": 18.1333, "step": 11292 }, { "epoch": 0.2064269654705979, "grad_norm": 7.670740089916023, "learning_rate": 9.20586752828704e-06, "loss": 18.0532, "step": 11293 }, { "epoch": 0.20644524466704445, "grad_norm": 6.4864691492117235, "learning_rate": 9.205707448141901e-06, "loss": 17.4666, "step": 11294 }, { "epoch": 0.20646352386349096, "grad_norm": 6.757883265958228, "learning_rate": 9.205547353256123e-06, "loss": 17.739, "step": 11295 }, { "epoch": 0.2064818030599375, "grad_norm": 6.691835969288469, "learning_rate": 9.205387243630267e-06, "loss": 17.8983, "step": 11296 }, { "epoch": 0.206500082256384, "grad_norm": 7.450243470813527, "learning_rate": 9.205227119264892e-06, "loss": 17.7776, "step": 11297 }, { "epoch": 0.20651836145283053, "grad_norm": 7.294062044729192, "learning_rate": 9.205066980160561e-06, "loss": 17.6949, "step": 11298 }, { "epoch": 0.20653664064927707, "grad_norm": 6.869556676245195, "learning_rate": 9.204906826317835e-06, "loss": 17.8619, "step": 11299 }, { "epoch": 0.20655491984572358, "grad_norm": 7.881950895303181, "learning_rate": 9.204746657737276e-06, "loss": 18.0034, "step": 11300 }, { "epoch": 0.2065731990421701, "grad_norm": 6.516121643789768, "learning_rate": 9.204586474419445e-06, "loss": 17.5569, "step": 11301 }, { "epoch": 0.20659147823861662, "grad_norm": 7.121390109024867, "learning_rate": 9.204426276364905e-06, "loss": 17.8313, "step": 11302 }, { "epoch": 0.20660975743506316, "grad_norm": 8.016283913408762, "learning_rate": 9.204266063574212e-06, "loss": 18.2278, "step": 11303 }, { "epoch": 0.20662803663150967, "grad_norm": 6.9818234347910755, "learning_rate": 9.204105836047934e-06, "loss": 17.5215, "step": 11304 }, { "epoch": 0.2066463158279562, "grad_norm": 5.269493716760305, "learning_rate": 9.203945593786628e-06, "loss": 17.0022, "step": 11305 }, { "epoch": 0.20666459502440274, "grad_norm": 6.455078838645331, "learning_rate": 9.20378533679086e-06, "loss": 17.3951, "step": 11306 }, { "epoch": 0.20668287422084924, "grad_norm": 6.523396538436194, "learning_rate": 9.203625065061186e-06, "loss": 17.3609, "step": 11307 }, { "epoch": 0.20670115341729578, "grad_norm": 6.544004948372208, "learning_rate": 9.203464778598173e-06, "loss": 17.4493, "step": 11308 }, { "epoch": 0.2067194326137423, "grad_norm": 6.419709541482147, "learning_rate": 9.203304477402382e-06, "loss": 17.6385, "step": 11309 }, { "epoch": 0.20673771181018882, "grad_norm": 6.332698117626546, "learning_rate": 9.20314416147437e-06, "loss": 17.6083, "step": 11310 }, { "epoch": 0.20675599100663536, "grad_norm": 6.55180966767108, "learning_rate": 9.202983830814704e-06, "loss": 17.3829, "step": 11311 }, { "epoch": 0.20677427020308187, "grad_norm": 6.576574494804952, "learning_rate": 9.202823485423946e-06, "loss": 17.5929, "step": 11312 }, { "epoch": 0.2067925493995284, "grad_norm": 7.869222319228628, "learning_rate": 9.202663125302656e-06, "loss": 17.711, "step": 11313 }, { "epoch": 0.2068108285959749, "grad_norm": 7.220267385438705, "learning_rate": 9.202502750451394e-06, "loss": 17.8717, "step": 11314 }, { "epoch": 0.20682910779242145, "grad_norm": 6.498563801180941, "learning_rate": 9.202342360870726e-06, "loss": 17.4267, "step": 11315 }, { "epoch": 0.20684738698886798, "grad_norm": 8.49745539591033, "learning_rate": 9.202181956561213e-06, "loss": 18.3729, "step": 11316 }, { "epoch": 0.2068656661853145, "grad_norm": 6.113489474513642, "learning_rate": 9.202021537523417e-06, "loss": 17.3899, "step": 11317 }, { "epoch": 0.20688394538176103, "grad_norm": 7.259428603948771, "learning_rate": 9.2018611037579e-06, "loss": 17.6373, "step": 11318 }, { "epoch": 0.20690222457820753, "grad_norm": 6.862792995101118, "learning_rate": 9.201700655265224e-06, "loss": 17.6216, "step": 11319 }, { "epoch": 0.20692050377465407, "grad_norm": 5.896591952112572, "learning_rate": 9.201540192045952e-06, "loss": 17.1226, "step": 11320 }, { "epoch": 0.20693878297110058, "grad_norm": 7.457648774614467, "learning_rate": 9.201379714100647e-06, "loss": 18.2697, "step": 11321 }, { "epoch": 0.2069570621675471, "grad_norm": 6.263572739947989, "learning_rate": 9.201219221429869e-06, "loss": 17.4497, "step": 11322 }, { "epoch": 0.20697534136399365, "grad_norm": 5.976391011517805, "learning_rate": 9.201058714034183e-06, "loss": 17.3546, "step": 11323 }, { "epoch": 0.20699362056044016, "grad_norm": 6.554414667157984, "learning_rate": 9.200898191914152e-06, "loss": 17.4139, "step": 11324 }, { "epoch": 0.2070118997568867, "grad_norm": 5.507861465424304, "learning_rate": 9.200737655070336e-06, "loss": 16.9907, "step": 11325 }, { "epoch": 0.2070301789533332, "grad_norm": 9.746191475372257, "learning_rate": 9.2005771035033e-06, "loss": 18.7296, "step": 11326 }, { "epoch": 0.20704845814977973, "grad_norm": 6.341279694026166, "learning_rate": 9.200416537213604e-06, "loss": 17.1672, "step": 11327 }, { "epoch": 0.20706673734622627, "grad_norm": 6.435856974138208, "learning_rate": 9.200255956201814e-06, "loss": 17.3943, "step": 11328 }, { "epoch": 0.20708501654267278, "grad_norm": 6.875642123729105, "learning_rate": 9.200095360468491e-06, "loss": 17.8153, "step": 11329 }, { "epoch": 0.20710329573911931, "grad_norm": 5.805741798094388, "learning_rate": 9.1999347500142e-06, "loss": 17.2909, "step": 11330 }, { "epoch": 0.20712157493556582, "grad_norm": 6.748350954475548, "learning_rate": 9.199774124839499e-06, "loss": 17.7981, "step": 11331 }, { "epoch": 0.20713985413201236, "grad_norm": 7.744974087535545, "learning_rate": 9.199613484944956e-06, "loss": 17.9069, "step": 11332 }, { "epoch": 0.2071581333284589, "grad_norm": 6.331897098997471, "learning_rate": 9.19945283033113e-06, "loss": 17.4355, "step": 11333 }, { "epoch": 0.2071764125249054, "grad_norm": 9.14905122928649, "learning_rate": 9.19929216099859e-06, "loss": 18.0282, "step": 11334 }, { "epoch": 0.20719469172135194, "grad_norm": 7.462062481892406, "learning_rate": 9.199131476947893e-06, "loss": 17.7042, "step": 11335 }, { "epoch": 0.20721297091779844, "grad_norm": 6.223147064247256, "learning_rate": 9.198970778179605e-06, "loss": 17.1327, "step": 11336 }, { "epoch": 0.20723125011424498, "grad_norm": 5.925743273389555, "learning_rate": 9.198810064694287e-06, "loss": 17.0214, "step": 11337 }, { "epoch": 0.2072495293106915, "grad_norm": 6.9396073366208135, "learning_rate": 9.198649336492506e-06, "loss": 17.5051, "step": 11338 }, { "epoch": 0.20726780850713802, "grad_norm": 6.337173387833642, "learning_rate": 9.198488593574823e-06, "loss": 17.7323, "step": 11339 }, { "epoch": 0.20728608770358456, "grad_norm": 7.55508858779258, "learning_rate": 9.198327835941803e-06, "loss": 17.9832, "step": 11340 }, { "epoch": 0.20730436690003107, "grad_norm": 7.509552391910138, "learning_rate": 9.198167063594006e-06, "loss": 17.8462, "step": 11341 }, { "epoch": 0.2073226460964776, "grad_norm": 6.973246871998054, "learning_rate": 9.198006276531999e-06, "loss": 17.5752, "step": 11342 }, { "epoch": 0.2073409252929241, "grad_norm": 8.300606732955098, "learning_rate": 9.197845474756344e-06, "loss": 17.9324, "step": 11343 }, { "epoch": 0.20735920448937065, "grad_norm": 6.386337974565366, "learning_rate": 9.197684658267606e-06, "loss": 17.2407, "step": 11344 }, { "epoch": 0.20737748368581718, "grad_norm": 6.466076760950484, "learning_rate": 9.197523827066347e-06, "loss": 17.7687, "step": 11345 }, { "epoch": 0.2073957628822637, "grad_norm": 7.731434377724766, "learning_rate": 9.19736298115313e-06, "loss": 17.6565, "step": 11346 }, { "epoch": 0.20741404207871023, "grad_norm": 8.723847987198447, "learning_rate": 9.19720212052852e-06, "loss": 17.8272, "step": 11347 }, { "epoch": 0.20743232127515673, "grad_norm": 7.794014451859341, "learning_rate": 9.197041245193084e-06, "loss": 17.756, "step": 11348 }, { "epoch": 0.20745060047160327, "grad_norm": 6.766738502526163, "learning_rate": 9.19688035514738e-06, "loss": 17.5409, "step": 11349 }, { "epoch": 0.2074688796680498, "grad_norm": 5.770209024310648, "learning_rate": 9.196719450391975e-06, "loss": 17.3905, "step": 11350 }, { "epoch": 0.2074871588644963, "grad_norm": 6.549221562153917, "learning_rate": 9.19655853092743e-06, "loss": 17.5294, "step": 11351 }, { "epoch": 0.20750543806094285, "grad_norm": 6.980172463938054, "learning_rate": 9.196397596754316e-06, "loss": 17.4521, "step": 11352 }, { "epoch": 0.20752371725738936, "grad_norm": 6.60327897428517, "learning_rate": 9.196236647873189e-06, "loss": 17.5934, "step": 11353 }, { "epoch": 0.2075419964538359, "grad_norm": 7.065355057578235, "learning_rate": 9.19607568428462e-06, "loss": 17.8144, "step": 11354 }, { "epoch": 0.2075602756502824, "grad_norm": 6.866725361095603, "learning_rate": 9.195914705989166e-06, "loss": 17.7256, "step": 11355 }, { "epoch": 0.20757855484672894, "grad_norm": 5.844090363108879, "learning_rate": 9.195753712987397e-06, "loss": 17.2677, "step": 11356 }, { "epoch": 0.20759683404317547, "grad_norm": 8.539182934160497, "learning_rate": 9.195592705279876e-06, "loss": 18.7554, "step": 11357 }, { "epoch": 0.20761511323962198, "grad_norm": 6.5348978855057, "learning_rate": 9.195431682867166e-06, "loss": 17.5521, "step": 11358 }, { "epoch": 0.20763339243606851, "grad_norm": 6.803589053570613, "learning_rate": 9.195270645749833e-06, "loss": 17.6202, "step": 11359 }, { "epoch": 0.20765167163251502, "grad_norm": 6.727479910864619, "learning_rate": 9.195109593928438e-06, "loss": 17.6091, "step": 11360 }, { "epoch": 0.20766995082896156, "grad_norm": 6.413173530944886, "learning_rate": 9.19494852740355e-06, "loss": 17.3738, "step": 11361 }, { "epoch": 0.2076882300254081, "grad_norm": 7.601512957017651, "learning_rate": 9.19478744617573e-06, "loss": 17.9366, "step": 11362 }, { "epoch": 0.2077065092218546, "grad_norm": 5.789707525698412, "learning_rate": 9.194626350245546e-06, "loss": 17.1879, "step": 11363 }, { "epoch": 0.20772478841830114, "grad_norm": 5.443213409859194, "learning_rate": 9.19446523961356e-06, "loss": 17.0118, "step": 11364 }, { "epoch": 0.20774306761474765, "grad_norm": 6.413676483015337, "learning_rate": 9.194304114280335e-06, "loss": 17.4193, "step": 11365 }, { "epoch": 0.20776134681119418, "grad_norm": 9.445433958255618, "learning_rate": 9.194142974246441e-06, "loss": 18.1818, "step": 11366 }, { "epoch": 0.20777962600764072, "grad_norm": 7.0577186360297794, "learning_rate": 9.193981819512439e-06, "loss": 17.6962, "step": 11367 }, { "epoch": 0.20779790520408722, "grad_norm": 6.606599937932465, "learning_rate": 9.193820650078893e-06, "loss": 17.3329, "step": 11368 }, { "epoch": 0.20781618440053376, "grad_norm": 7.269147101298209, "learning_rate": 9.19365946594637e-06, "loss": 17.7729, "step": 11369 }, { "epoch": 0.20783446359698027, "grad_norm": 10.434064155259405, "learning_rate": 9.193498267115435e-06, "loss": 17.6019, "step": 11370 }, { "epoch": 0.2078527427934268, "grad_norm": 6.489079456392142, "learning_rate": 9.193337053586654e-06, "loss": 17.5649, "step": 11371 }, { "epoch": 0.2078710219898733, "grad_norm": 5.076335939992683, "learning_rate": 9.19317582536059e-06, "loss": 16.8645, "step": 11372 }, { "epoch": 0.20788930118631985, "grad_norm": 6.619757491576467, "learning_rate": 9.193014582437806e-06, "loss": 17.4425, "step": 11373 }, { "epoch": 0.20790758038276638, "grad_norm": 7.043006390120621, "learning_rate": 9.192853324818873e-06, "loss": 17.7465, "step": 11374 }, { "epoch": 0.2079258595792129, "grad_norm": 7.056339196299807, "learning_rate": 9.192692052504351e-06, "loss": 18.1348, "step": 11375 }, { "epoch": 0.20794413877565943, "grad_norm": 7.062149859477818, "learning_rate": 9.192530765494807e-06, "loss": 17.8394, "step": 11376 }, { "epoch": 0.20796241797210593, "grad_norm": 6.9000246413895026, "learning_rate": 9.192369463790807e-06, "loss": 17.7384, "step": 11377 }, { "epoch": 0.20798069716855247, "grad_norm": 6.717007672062542, "learning_rate": 9.192208147392916e-06, "loss": 17.7328, "step": 11378 }, { "epoch": 0.207998976364999, "grad_norm": 7.64450286934646, "learning_rate": 9.192046816301701e-06, "loss": 18.4562, "step": 11379 }, { "epoch": 0.2080172555614455, "grad_norm": 5.88855172135595, "learning_rate": 9.191885470517724e-06, "loss": 17.2166, "step": 11380 }, { "epoch": 0.20803553475789205, "grad_norm": 5.841888329181857, "learning_rate": 9.191724110041551e-06, "loss": 16.9695, "step": 11381 }, { "epoch": 0.20805381395433856, "grad_norm": 6.762546033378747, "learning_rate": 9.19156273487375e-06, "loss": 17.7965, "step": 11382 }, { "epoch": 0.2080720931507851, "grad_norm": 6.768479023383248, "learning_rate": 9.191401345014886e-06, "loss": 17.5318, "step": 11383 }, { "epoch": 0.20809037234723163, "grad_norm": 6.559398362446401, "learning_rate": 9.191239940465522e-06, "loss": 17.4949, "step": 11384 }, { "epoch": 0.20810865154367814, "grad_norm": 7.3701661115443615, "learning_rate": 9.191078521226226e-06, "loss": 18.0867, "step": 11385 }, { "epoch": 0.20812693074012467, "grad_norm": 5.117281048479846, "learning_rate": 9.190917087297565e-06, "loss": 16.8779, "step": 11386 }, { "epoch": 0.20814520993657118, "grad_norm": 8.126069612112843, "learning_rate": 9.190755638680102e-06, "loss": 18.0336, "step": 11387 }, { "epoch": 0.20816348913301772, "grad_norm": 7.396998678970835, "learning_rate": 9.190594175374406e-06, "loss": 17.7872, "step": 11388 }, { "epoch": 0.20818176832946422, "grad_norm": 7.287776594949756, "learning_rate": 9.19043269738104e-06, "loss": 17.6405, "step": 11389 }, { "epoch": 0.20820004752591076, "grad_norm": 6.668249500613658, "learning_rate": 9.19027120470057e-06, "loss": 17.6275, "step": 11390 }, { "epoch": 0.2082183267223573, "grad_norm": 6.037178264374184, "learning_rate": 9.190109697333565e-06, "loss": 17.2264, "step": 11391 }, { "epoch": 0.2082366059188038, "grad_norm": 6.525459289361489, "learning_rate": 9.189948175280588e-06, "loss": 17.3118, "step": 11392 }, { "epoch": 0.20825488511525034, "grad_norm": 6.459079614435108, "learning_rate": 9.189786638542206e-06, "loss": 17.4513, "step": 11393 }, { "epoch": 0.20827316431169685, "grad_norm": 8.017385972157495, "learning_rate": 9.189625087118985e-06, "loss": 18.3373, "step": 11394 }, { "epoch": 0.20829144350814338, "grad_norm": 7.19975293101108, "learning_rate": 9.189463521011492e-06, "loss": 18.112, "step": 11395 }, { "epoch": 0.20830972270458992, "grad_norm": 7.312861634682219, "learning_rate": 9.189301940220295e-06, "loss": 17.7899, "step": 11396 }, { "epoch": 0.20832800190103642, "grad_norm": 5.8548590381567776, "learning_rate": 9.189140344745954e-06, "loss": 17.3417, "step": 11397 }, { "epoch": 0.20834628109748296, "grad_norm": 7.4346127839201515, "learning_rate": 9.188978734589043e-06, "loss": 17.8877, "step": 11398 }, { "epoch": 0.20836456029392947, "grad_norm": 8.118283307165527, "learning_rate": 9.188817109750124e-06, "loss": 18.2895, "step": 11399 }, { "epoch": 0.208382839490376, "grad_norm": 7.3696099466786436, "learning_rate": 9.188655470229766e-06, "loss": 17.7555, "step": 11400 }, { "epoch": 0.20840111868682254, "grad_norm": 7.903233934029004, "learning_rate": 9.188493816028532e-06, "loss": 18.2937, "step": 11401 }, { "epoch": 0.20841939788326905, "grad_norm": 5.666262011006457, "learning_rate": 9.188332147146991e-06, "loss": 17.3378, "step": 11402 }, { "epoch": 0.20843767707971558, "grad_norm": 7.385682597886673, "learning_rate": 9.188170463585709e-06, "loss": 17.8607, "step": 11403 }, { "epoch": 0.2084559562761621, "grad_norm": 6.771538572618234, "learning_rate": 9.188008765345253e-06, "loss": 17.6437, "step": 11404 }, { "epoch": 0.20847423547260863, "grad_norm": 6.72804536843456, "learning_rate": 9.18784705242619e-06, "loss": 17.7804, "step": 11405 }, { "epoch": 0.20849251466905513, "grad_norm": 7.677137840821924, "learning_rate": 9.187685324829088e-06, "loss": 18.1629, "step": 11406 }, { "epoch": 0.20851079386550167, "grad_norm": 7.704103037260511, "learning_rate": 9.187523582554512e-06, "loss": 17.9746, "step": 11407 }, { "epoch": 0.2085290730619482, "grad_norm": 6.845532859197307, "learning_rate": 9.187361825603027e-06, "loss": 17.6056, "step": 11408 }, { "epoch": 0.2085473522583947, "grad_norm": 7.021336147229711, "learning_rate": 9.187200053975203e-06, "loss": 17.5864, "step": 11409 }, { "epoch": 0.20856563145484125, "grad_norm": 6.675649621210543, "learning_rate": 9.187038267671606e-06, "loss": 17.7085, "step": 11410 }, { "epoch": 0.20858391065128776, "grad_norm": 6.906161304809612, "learning_rate": 9.186876466692805e-06, "loss": 17.7359, "step": 11411 }, { "epoch": 0.2086021898477343, "grad_norm": 7.051825935775604, "learning_rate": 9.186714651039364e-06, "loss": 17.8063, "step": 11412 }, { "epoch": 0.20862046904418083, "grad_norm": 7.323049960219737, "learning_rate": 9.186552820711852e-06, "loss": 18.0081, "step": 11413 }, { "epoch": 0.20863874824062734, "grad_norm": 5.1518009573793115, "learning_rate": 9.186390975710835e-06, "loss": 16.8863, "step": 11414 }, { "epoch": 0.20865702743707387, "grad_norm": 5.688528340186772, "learning_rate": 9.18622911603688e-06, "loss": 16.9123, "step": 11415 }, { "epoch": 0.20867530663352038, "grad_norm": 7.3617259942742646, "learning_rate": 9.186067241690556e-06, "loss": 17.7749, "step": 11416 }, { "epoch": 0.20869358582996692, "grad_norm": 8.028959493961613, "learning_rate": 9.18590535267243e-06, "loss": 17.6508, "step": 11417 }, { "epoch": 0.20871186502641345, "grad_norm": 6.41800121755563, "learning_rate": 9.185743448983068e-06, "loss": 17.5967, "step": 11418 }, { "epoch": 0.20873014422285996, "grad_norm": 6.732489245810147, "learning_rate": 9.18558153062304e-06, "loss": 17.5627, "step": 11419 }, { "epoch": 0.2087484234193065, "grad_norm": 8.23426094625324, "learning_rate": 9.185419597592912e-06, "loss": 18.5474, "step": 11420 }, { "epoch": 0.208766702615753, "grad_norm": 5.885122435227352, "learning_rate": 9.185257649893251e-06, "loss": 17.3825, "step": 11421 }, { "epoch": 0.20878498181219954, "grad_norm": 8.256182545916584, "learning_rate": 9.185095687524625e-06, "loss": 18.2636, "step": 11422 }, { "epoch": 0.20880326100864605, "grad_norm": 6.254738320064321, "learning_rate": 9.184933710487602e-06, "loss": 17.4135, "step": 11423 }, { "epoch": 0.20882154020509258, "grad_norm": 9.116702380571455, "learning_rate": 9.184771718782748e-06, "loss": 17.8799, "step": 11424 }, { "epoch": 0.20883981940153912, "grad_norm": 7.7365396336510095, "learning_rate": 9.184609712410633e-06, "loss": 18.1285, "step": 11425 }, { "epoch": 0.20885809859798563, "grad_norm": 7.124670689954153, "learning_rate": 9.184447691371825e-06, "loss": 17.692, "step": 11426 }, { "epoch": 0.20887637779443216, "grad_norm": 8.189534692613202, "learning_rate": 9.18428565566689e-06, "loss": 18.231, "step": 11427 }, { "epoch": 0.20889465699087867, "grad_norm": 6.6969344705123595, "learning_rate": 9.184123605296397e-06, "loss": 17.6378, "step": 11428 }, { "epoch": 0.2089129361873252, "grad_norm": 7.448276271927157, "learning_rate": 9.183961540260914e-06, "loss": 18.1656, "step": 11429 }, { "epoch": 0.20893121538377174, "grad_norm": 6.750870260961923, "learning_rate": 9.18379946056101e-06, "loss": 17.7285, "step": 11430 }, { "epoch": 0.20894949458021825, "grad_norm": 6.91195418997168, "learning_rate": 9.183637366197252e-06, "loss": 17.4656, "step": 11431 }, { "epoch": 0.20896777377666478, "grad_norm": 7.112439559293752, "learning_rate": 9.183475257170205e-06, "loss": 17.8876, "step": 11432 }, { "epoch": 0.2089860529731113, "grad_norm": 7.193365368428257, "learning_rate": 9.183313133480442e-06, "loss": 17.7278, "step": 11433 }, { "epoch": 0.20900433216955783, "grad_norm": 6.876107552977072, "learning_rate": 9.18315099512853e-06, "loss": 17.8101, "step": 11434 }, { "epoch": 0.20902261136600436, "grad_norm": 8.549983313567564, "learning_rate": 9.182988842115037e-06, "loss": 18.7058, "step": 11435 }, { "epoch": 0.20904089056245087, "grad_norm": 9.235386252918953, "learning_rate": 9.18282667444053e-06, "loss": 18.5952, "step": 11436 }, { "epoch": 0.2090591697588974, "grad_norm": 7.496746496783825, "learning_rate": 9.182664492105579e-06, "loss": 17.7112, "step": 11437 }, { "epoch": 0.20907744895534391, "grad_norm": 8.29534400602403, "learning_rate": 9.182502295110754e-06, "loss": 17.9013, "step": 11438 }, { "epoch": 0.20909572815179045, "grad_norm": 7.806434686132032, "learning_rate": 9.182340083456618e-06, "loss": 17.938, "step": 11439 }, { "epoch": 0.20911400734823696, "grad_norm": 7.688533630590921, "learning_rate": 9.182177857143744e-06, "loss": 18.0619, "step": 11440 }, { "epoch": 0.2091322865446835, "grad_norm": 7.974976010537834, "learning_rate": 9.1820156161727e-06, "loss": 18.0908, "step": 11441 }, { "epoch": 0.20915056574113003, "grad_norm": 8.852588540338171, "learning_rate": 9.181853360544054e-06, "loss": 18.5536, "step": 11442 }, { "epoch": 0.20916884493757654, "grad_norm": 5.238748007782483, "learning_rate": 9.181691090258375e-06, "loss": 16.9777, "step": 11443 }, { "epoch": 0.20918712413402307, "grad_norm": 5.822094027495809, "learning_rate": 9.18152880531623e-06, "loss": 17.2267, "step": 11444 }, { "epoch": 0.20920540333046958, "grad_norm": 8.633433623820073, "learning_rate": 9.181366505718192e-06, "loss": 18.2636, "step": 11445 }, { "epoch": 0.20922368252691612, "grad_norm": 6.403210662031341, "learning_rate": 9.181204191464827e-06, "loss": 17.5143, "step": 11446 }, { "epoch": 0.20924196172336265, "grad_norm": 6.161692954214665, "learning_rate": 9.181041862556703e-06, "loss": 17.2519, "step": 11447 }, { "epoch": 0.20926024091980916, "grad_norm": 6.270337845095842, "learning_rate": 9.18087951899439e-06, "loss": 17.325, "step": 11448 }, { "epoch": 0.2092785201162557, "grad_norm": 6.037258527576589, "learning_rate": 9.180717160778458e-06, "loss": 17.2384, "step": 11449 }, { "epoch": 0.2092967993127022, "grad_norm": 7.532790883811677, "learning_rate": 9.180554787909474e-06, "loss": 17.6423, "step": 11450 }, { "epoch": 0.20931507850914874, "grad_norm": 6.960346383221193, "learning_rate": 9.180392400388008e-06, "loss": 17.9873, "step": 11451 }, { "epoch": 0.20933335770559527, "grad_norm": 6.3281520616882245, "learning_rate": 9.180229998214632e-06, "loss": 17.4254, "step": 11452 }, { "epoch": 0.20935163690204178, "grad_norm": 7.930718464564416, "learning_rate": 9.180067581389908e-06, "loss": 17.9693, "step": 11453 }, { "epoch": 0.20936991609848832, "grad_norm": 7.16065467253636, "learning_rate": 9.179905149914413e-06, "loss": 17.9344, "step": 11454 }, { "epoch": 0.20938819529493483, "grad_norm": 6.461398696890172, "learning_rate": 9.179742703788715e-06, "loss": 17.5761, "step": 11455 }, { "epoch": 0.20940647449138136, "grad_norm": 5.583603477464138, "learning_rate": 9.179580243013378e-06, "loss": 17.2408, "step": 11456 }, { "epoch": 0.20942475368782787, "grad_norm": 8.131423284574632, "learning_rate": 9.179417767588975e-06, "loss": 18.8045, "step": 11457 }, { "epoch": 0.2094430328842744, "grad_norm": 6.570135935211809, "learning_rate": 9.179255277516077e-06, "loss": 17.7531, "step": 11458 }, { "epoch": 0.20946131208072094, "grad_norm": 7.046249102869978, "learning_rate": 9.179092772795253e-06, "loss": 17.7526, "step": 11459 }, { "epoch": 0.20947959127716745, "grad_norm": 8.84210435560813, "learning_rate": 9.17893025342707e-06, "loss": 18.1264, "step": 11460 }, { "epoch": 0.20949787047361398, "grad_norm": 7.013666125292309, "learning_rate": 9.178767719412098e-06, "loss": 17.6529, "step": 11461 }, { "epoch": 0.2095161496700605, "grad_norm": 6.193632295670529, "learning_rate": 9.17860517075091e-06, "loss": 17.5707, "step": 11462 }, { "epoch": 0.20953442886650703, "grad_norm": 6.444396119092666, "learning_rate": 9.17844260744407e-06, "loss": 17.2939, "step": 11463 }, { "epoch": 0.20955270806295356, "grad_norm": 5.333388606505633, "learning_rate": 9.178280029492154e-06, "loss": 16.963, "step": 11464 }, { "epoch": 0.20957098725940007, "grad_norm": 5.560412811342642, "learning_rate": 9.178117436895731e-06, "loss": 17.177, "step": 11465 }, { "epoch": 0.2095892664558466, "grad_norm": 6.115751298206976, "learning_rate": 9.177954829655367e-06, "loss": 17.423, "step": 11466 }, { "epoch": 0.20960754565229311, "grad_norm": 5.775824499667194, "learning_rate": 9.177792207771632e-06, "loss": 17.2843, "step": 11467 }, { "epoch": 0.20962582484873965, "grad_norm": 6.633139118448926, "learning_rate": 9.177629571245099e-06, "loss": 17.6942, "step": 11468 }, { "epoch": 0.20964410404518619, "grad_norm": 6.442523114555858, "learning_rate": 9.177466920076337e-06, "loss": 17.6104, "step": 11469 }, { "epoch": 0.2096623832416327, "grad_norm": 6.9251317768575955, "learning_rate": 9.177304254265918e-06, "loss": 17.7068, "step": 11470 }, { "epoch": 0.20968066243807923, "grad_norm": 7.398398019701001, "learning_rate": 9.177141573814407e-06, "loss": 17.8246, "step": 11471 }, { "epoch": 0.20969894163452574, "grad_norm": 6.424344074639616, "learning_rate": 9.176978878722378e-06, "loss": 17.4117, "step": 11472 }, { "epoch": 0.20971722083097227, "grad_norm": 6.306753861980729, "learning_rate": 9.176816168990402e-06, "loss": 17.2655, "step": 11473 }, { "epoch": 0.20973550002741878, "grad_norm": 6.672184053816812, "learning_rate": 9.176653444619045e-06, "loss": 17.54, "step": 11474 }, { "epoch": 0.20975377922386532, "grad_norm": 5.9405160859744655, "learning_rate": 9.176490705608883e-06, "loss": 17.3811, "step": 11475 }, { "epoch": 0.20977205842031185, "grad_norm": 6.3595404036329795, "learning_rate": 9.176327951960481e-06, "loss": 17.5449, "step": 11476 }, { "epoch": 0.20979033761675836, "grad_norm": 7.935320178925701, "learning_rate": 9.176165183674415e-06, "loss": 18.1486, "step": 11477 }, { "epoch": 0.2098086168132049, "grad_norm": 7.020614202529794, "learning_rate": 9.17600240075125e-06, "loss": 17.6709, "step": 11478 }, { "epoch": 0.2098268960096514, "grad_norm": 6.801796338808055, "learning_rate": 9.175839603191562e-06, "loss": 17.6184, "step": 11479 }, { "epoch": 0.20984517520609794, "grad_norm": 7.41898396181858, "learning_rate": 9.175676790995914e-06, "loss": 18.0923, "step": 11480 }, { "epoch": 0.20986345440254447, "grad_norm": 7.078454204215108, "learning_rate": 9.175513964164884e-06, "loss": 17.5183, "step": 11481 }, { "epoch": 0.20988173359899098, "grad_norm": 7.246881122848099, "learning_rate": 9.175351122699039e-06, "loss": 17.873, "step": 11482 }, { "epoch": 0.20990001279543752, "grad_norm": 7.2882921527209765, "learning_rate": 9.175188266598952e-06, "loss": 18.4119, "step": 11483 }, { "epoch": 0.20991829199188403, "grad_norm": 5.771480647417177, "learning_rate": 9.175025395865191e-06, "loss": 17.2252, "step": 11484 }, { "epoch": 0.20993657118833056, "grad_norm": 5.6758103847942705, "learning_rate": 9.174862510498328e-06, "loss": 17.0851, "step": 11485 }, { "epoch": 0.2099548503847771, "grad_norm": 7.267595105528622, "learning_rate": 9.174699610498935e-06, "loss": 17.7672, "step": 11486 }, { "epoch": 0.2099731295812236, "grad_norm": 7.854747483556572, "learning_rate": 9.174536695867582e-06, "loss": 17.9662, "step": 11487 }, { "epoch": 0.20999140877767014, "grad_norm": 6.087095273285454, "learning_rate": 9.17437376660484e-06, "loss": 17.4715, "step": 11488 }, { "epoch": 0.21000968797411665, "grad_norm": 9.184815723203007, "learning_rate": 9.17421082271128e-06, "loss": 18.5931, "step": 11489 }, { "epoch": 0.21002796717056318, "grad_norm": 6.876450065885871, "learning_rate": 9.174047864187474e-06, "loss": 17.8128, "step": 11490 }, { "epoch": 0.2100462463670097, "grad_norm": 7.182804026312148, "learning_rate": 9.173884891033991e-06, "loss": 18.0019, "step": 11491 }, { "epoch": 0.21006452556345623, "grad_norm": 7.132975071055591, "learning_rate": 9.173721903251406e-06, "loss": 17.7768, "step": 11492 }, { "epoch": 0.21008280475990276, "grad_norm": 7.024857048951854, "learning_rate": 9.173558900840286e-06, "loss": 17.8144, "step": 11493 }, { "epoch": 0.21010108395634927, "grad_norm": 7.681145450321271, "learning_rate": 9.173395883801205e-06, "loss": 17.9513, "step": 11494 }, { "epoch": 0.2101193631527958, "grad_norm": 6.469819414030036, "learning_rate": 9.173232852134733e-06, "loss": 17.4744, "step": 11495 }, { "epoch": 0.21013764234924232, "grad_norm": 6.146947289438684, "learning_rate": 9.173069805841442e-06, "loss": 17.3704, "step": 11496 }, { "epoch": 0.21015592154568885, "grad_norm": 6.551915270906452, "learning_rate": 9.172906744921904e-06, "loss": 17.3509, "step": 11497 }, { "epoch": 0.2101742007421354, "grad_norm": 6.192812102346293, "learning_rate": 9.172743669376691e-06, "loss": 17.6447, "step": 11498 }, { "epoch": 0.2101924799385819, "grad_norm": 6.415154541814359, "learning_rate": 9.172580579206372e-06, "loss": 17.3324, "step": 11499 }, { "epoch": 0.21021075913502843, "grad_norm": 6.722914924792808, "learning_rate": 9.172417474411521e-06, "loss": 17.6114, "step": 11500 }, { "epoch": 0.21022903833147494, "grad_norm": 8.548949703373932, "learning_rate": 9.172254354992707e-06, "loss": 18.1477, "step": 11501 }, { "epoch": 0.21024731752792147, "grad_norm": 5.861101349220818, "learning_rate": 9.172091220950505e-06, "loss": 17.2611, "step": 11502 }, { "epoch": 0.210265596724368, "grad_norm": 5.905902955464844, "learning_rate": 9.171928072285486e-06, "loss": 17.3396, "step": 11503 }, { "epoch": 0.21028387592081452, "grad_norm": 5.915945906752697, "learning_rate": 9.17176490899822e-06, "loss": 17.4707, "step": 11504 }, { "epoch": 0.21030215511726105, "grad_norm": 6.276079061196319, "learning_rate": 9.17160173108928e-06, "loss": 17.4897, "step": 11505 }, { "epoch": 0.21032043431370756, "grad_norm": 6.4612495913653625, "learning_rate": 9.171438538559239e-06, "loss": 17.8194, "step": 11506 }, { "epoch": 0.2103387135101541, "grad_norm": 6.872186108418846, "learning_rate": 9.171275331408667e-06, "loss": 17.6669, "step": 11507 }, { "epoch": 0.2103569927066006, "grad_norm": 5.683670818575571, "learning_rate": 9.171112109638136e-06, "loss": 17.1627, "step": 11508 }, { "epoch": 0.21037527190304714, "grad_norm": 6.323289335138741, "learning_rate": 9.17094887324822e-06, "loss": 17.5417, "step": 11509 }, { "epoch": 0.21039355109949368, "grad_norm": 5.994572405306893, "learning_rate": 9.170785622239491e-06, "loss": 17.4275, "step": 11510 }, { "epoch": 0.21041183029594018, "grad_norm": 6.553108039899262, "learning_rate": 9.17062235661252e-06, "loss": 17.749, "step": 11511 }, { "epoch": 0.21043010949238672, "grad_norm": 6.268290066568144, "learning_rate": 9.170459076367878e-06, "loss": 17.5229, "step": 11512 }, { "epoch": 0.21044838868883323, "grad_norm": 6.242131561088442, "learning_rate": 9.17029578150614e-06, "loss": 17.6085, "step": 11513 }, { "epoch": 0.21046666788527976, "grad_norm": 6.839671989520956, "learning_rate": 9.170132472027876e-06, "loss": 17.7471, "step": 11514 }, { "epoch": 0.2104849470817263, "grad_norm": 6.299417662322813, "learning_rate": 9.169969147933661e-06, "loss": 17.3098, "step": 11515 }, { "epoch": 0.2105032262781728, "grad_norm": 6.664050719356811, "learning_rate": 9.169805809224067e-06, "loss": 17.5669, "step": 11516 }, { "epoch": 0.21052150547461934, "grad_norm": 6.881472460367836, "learning_rate": 9.169642455899664e-06, "loss": 17.6966, "step": 11517 }, { "epoch": 0.21053978467106585, "grad_norm": 7.650325287111541, "learning_rate": 9.169479087961026e-06, "loss": 17.976, "step": 11518 }, { "epoch": 0.21055806386751239, "grad_norm": 6.622241294044782, "learning_rate": 9.169315705408725e-06, "loss": 17.5739, "step": 11519 }, { "epoch": 0.21057634306395892, "grad_norm": 6.596926006218519, "learning_rate": 9.169152308243334e-06, "loss": 17.704, "step": 11520 }, { "epoch": 0.21059462226040543, "grad_norm": 6.179397638603914, "learning_rate": 9.168988896465428e-06, "loss": 17.3994, "step": 11521 }, { "epoch": 0.21061290145685196, "grad_norm": 7.3885773820083935, "learning_rate": 9.168825470075575e-06, "loss": 18.2278, "step": 11522 }, { "epoch": 0.21063118065329847, "grad_norm": 7.24958690158779, "learning_rate": 9.168662029074354e-06, "loss": 17.681, "step": 11523 }, { "epoch": 0.210649459849745, "grad_norm": 7.721728704817233, "learning_rate": 9.168498573462332e-06, "loss": 17.9807, "step": 11524 }, { "epoch": 0.21066773904619152, "grad_norm": 8.601277694577712, "learning_rate": 9.168335103240085e-06, "loss": 17.554, "step": 11525 }, { "epoch": 0.21068601824263805, "grad_norm": 7.818730322223752, "learning_rate": 9.168171618408184e-06, "loss": 17.7242, "step": 11526 }, { "epoch": 0.2107042974390846, "grad_norm": 6.578160810947474, "learning_rate": 9.168008118967205e-06, "loss": 17.8085, "step": 11527 }, { "epoch": 0.2107225766355311, "grad_norm": 7.813451617036049, "learning_rate": 9.167844604917718e-06, "loss": 17.8416, "step": 11528 }, { "epoch": 0.21074085583197763, "grad_norm": 7.586779397627988, "learning_rate": 9.167681076260296e-06, "loss": 18.3751, "step": 11529 }, { "epoch": 0.21075913502842414, "grad_norm": 7.190603490535477, "learning_rate": 9.167517532995516e-06, "loss": 18.267, "step": 11530 }, { "epoch": 0.21077741422487067, "grad_norm": 6.1094351684151835, "learning_rate": 9.167353975123947e-06, "loss": 17.2526, "step": 11531 }, { "epoch": 0.2107956934213172, "grad_norm": 5.802959557639488, "learning_rate": 9.167190402646165e-06, "loss": 17.4219, "step": 11532 }, { "epoch": 0.21081397261776372, "grad_norm": 7.848723997652496, "learning_rate": 9.167026815562742e-06, "loss": 17.758, "step": 11533 }, { "epoch": 0.21083225181421025, "grad_norm": 6.219336050238912, "learning_rate": 9.166863213874251e-06, "loss": 17.2805, "step": 11534 }, { "epoch": 0.21085053101065676, "grad_norm": 6.420279170239606, "learning_rate": 9.166699597581267e-06, "loss": 17.3998, "step": 11535 }, { "epoch": 0.2108688102071033, "grad_norm": 6.206917950445866, "learning_rate": 9.166535966684362e-06, "loss": 17.4474, "step": 11536 }, { "epoch": 0.21088708940354983, "grad_norm": 7.389121348819259, "learning_rate": 9.16637232118411e-06, "loss": 17.8443, "step": 11537 }, { "epoch": 0.21090536859999634, "grad_norm": 7.116199840483028, "learning_rate": 9.166208661081084e-06, "loss": 17.713, "step": 11538 }, { "epoch": 0.21092364779644288, "grad_norm": 6.67231809526451, "learning_rate": 9.16604498637586e-06, "loss": 17.4988, "step": 11539 }, { "epoch": 0.21094192699288938, "grad_norm": 7.694803892212349, "learning_rate": 9.165881297069008e-06, "loss": 18.0834, "step": 11540 }, { "epoch": 0.21096020618933592, "grad_norm": 6.290457034681836, "learning_rate": 9.165717593161104e-06, "loss": 17.6581, "step": 11541 }, { "epoch": 0.21097848538578243, "grad_norm": 6.212094558893326, "learning_rate": 9.16555387465272e-06, "loss": 17.4364, "step": 11542 }, { "epoch": 0.21099676458222896, "grad_norm": 6.628723132600602, "learning_rate": 9.165390141544433e-06, "loss": 17.5085, "step": 11543 }, { "epoch": 0.2110150437786755, "grad_norm": 6.358862691567192, "learning_rate": 9.165226393836815e-06, "loss": 17.3477, "step": 11544 }, { "epoch": 0.211033322975122, "grad_norm": 6.740305082496149, "learning_rate": 9.16506263153044e-06, "loss": 17.9455, "step": 11545 }, { "epoch": 0.21105160217156854, "grad_norm": 6.26308925039984, "learning_rate": 9.164898854625882e-06, "loss": 17.5103, "step": 11546 }, { "epoch": 0.21106988136801505, "grad_norm": 7.128480082817722, "learning_rate": 9.164735063123714e-06, "loss": 17.646, "step": 11547 }, { "epoch": 0.21108816056446159, "grad_norm": 6.4775515543540605, "learning_rate": 9.16457125702451e-06, "loss": 17.6652, "step": 11548 }, { "epoch": 0.21110643976090812, "grad_norm": 6.380750658999174, "learning_rate": 9.164407436328845e-06, "loss": 17.6208, "step": 11549 }, { "epoch": 0.21112471895735463, "grad_norm": 6.888637801448464, "learning_rate": 9.164243601037297e-06, "loss": 17.5228, "step": 11550 }, { "epoch": 0.21114299815380116, "grad_norm": 5.799070743378431, "learning_rate": 9.164079751150434e-06, "loss": 17.1002, "step": 11551 }, { "epoch": 0.21116127735024767, "grad_norm": 6.427275669896442, "learning_rate": 9.163915886668832e-06, "loss": 17.6247, "step": 11552 }, { "epoch": 0.2111795565466942, "grad_norm": 6.910129880790046, "learning_rate": 9.163752007593068e-06, "loss": 17.7014, "step": 11553 }, { "epoch": 0.21119783574314074, "grad_norm": 6.334479730762166, "learning_rate": 9.163588113923714e-06, "loss": 17.4241, "step": 11554 }, { "epoch": 0.21121611493958725, "grad_norm": 10.11538820518104, "learning_rate": 9.163424205661345e-06, "loss": 18.7674, "step": 11555 }, { "epoch": 0.2112343941360338, "grad_norm": 5.632433495283178, "learning_rate": 9.163260282806533e-06, "loss": 17.0817, "step": 11556 }, { "epoch": 0.2112526733324803, "grad_norm": 7.162886238789814, "learning_rate": 9.163096345359859e-06, "loss": 17.8033, "step": 11557 }, { "epoch": 0.21127095252892683, "grad_norm": 6.708746524379571, "learning_rate": 9.162932393321891e-06, "loss": 17.5362, "step": 11558 }, { "epoch": 0.21128923172537334, "grad_norm": 7.855253920799346, "learning_rate": 9.162768426693207e-06, "loss": 18.2244, "step": 11559 }, { "epoch": 0.21130751092181987, "grad_norm": 7.777970909711207, "learning_rate": 9.162604445474382e-06, "loss": 18.2098, "step": 11560 }, { "epoch": 0.2113257901182664, "grad_norm": 5.156482272524668, "learning_rate": 9.162440449665988e-06, "loss": 16.9433, "step": 11561 }, { "epoch": 0.21134406931471292, "grad_norm": 6.151643993226001, "learning_rate": 9.162276439268601e-06, "loss": 17.3725, "step": 11562 }, { "epoch": 0.21136234851115945, "grad_norm": 6.370579279810969, "learning_rate": 9.162112414282797e-06, "loss": 17.4643, "step": 11563 }, { "epoch": 0.21138062770760596, "grad_norm": 8.356616226837273, "learning_rate": 9.16194837470915e-06, "loss": 18.4377, "step": 11564 }, { "epoch": 0.2113989069040525, "grad_norm": 6.467923689781751, "learning_rate": 9.161784320548237e-06, "loss": 17.3117, "step": 11565 }, { "epoch": 0.21141718610049903, "grad_norm": 9.517975928212337, "learning_rate": 9.161620251800629e-06, "loss": 18.5935, "step": 11566 }, { "epoch": 0.21143546529694554, "grad_norm": 6.024618051987733, "learning_rate": 9.161456168466905e-06, "loss": 17.2635, "step": 11567 }, { "epoch": 0.21145374449339208, "grad_norm": 6.240698126621197, "learning_rate": 9.161292070547636e-06, "loss": 17.4433, "step": 11568 }, { "epoch": 0.21147202368983858, "grad_norm": 6.112310765398839, "learning_rate": 9.1611279580434e-06, "loss": 17.5563, "step": 11569 }, { "epoch": 0.21149030288628512, "grad_norm": 6.5602463956907595, "learning_rate": 9.160963830954772e-06, "loss": 17.8943, "step": 11570 }, { "epoch": 0.21150858208273166, "grad_norm": 6.263790052624193, "learning_rate": 9.160799689282327e-06, "loss": 17.509, "step": 11571 }, { "epoch": 0.21152686127917816, "grad_norm": 7.064124643991461, "learning_rate": 9.16063553302664e-06, "loss": 17.9556, "step": 11572 }, { "epoch": 0.2115451404756247, "grad_norm": 7.44979062916576, "learning_rate": 9.160471362188285e-06, "loss": 17.534, "step": 11573 }, { "epoch": 0.2115634196720712, "grad_norm": 7.0510195637169595, "learning_rate": 9.160307176767841e-06, "loss": 17.726, "step": 11574 }, { "epoch": 0.21158169886851774, "grad_norm": 5.517175463698974, "learning_rate": 9.16014297676588e-06, "loss": 17.1502, "step": 11575 }, { "epoch": 0.21159997806496425, "grad_norm": 7.138670414777385, "learning_rate": 9.159978762182979e-06, "loss": 17.9271, "step": 11576 }, { "epoch": 0.21161825726141079, "grad_norm": 6.403940223475687, "learning_rate": 9.159814533019716e-06, "loss": 17.4957, "step": 11577 }, { "epoch": 0.21163653645785732, "grad_norm": 6.7509313342979205, "learning_rate": 9.15965028927666e-06, "loss": 17.6066, "step": 11578 }, { "epoch": 0.21165481565430383, "grad_norm": 6.171380155539927, "learning_rate": 9.159486030954395e-06, "loss": 17.4828, "step": 11579 }, { "epoch": 0.21167309485075037, "grad_norm": 6.342175557557343, "learning_rate": 9.15932175805349e-06, "loss": 17.5831, "step": 11580 }, { "epoch": 0.21169137404719687, "grad_norm": 6.400579261216163, "learning_rate": 9.159157470574522e-06, "loss": 17.6458, "step": 11581 }, { "epoch": 0.2117096532436434, "grad_norm": 6.927875169843063, "learning_rate": 9.15899316851807e-06, "loss": 17.5782, "step": 11582 }, { "epoch": 0.21172793244008994, "grad_norm": 6.849203963747864, "learning_rate": 9.158828851884707e-06, "loss": 17.6371, "step": 11583 }, { "epoch": 0.21174621163653645, "grad_norm": 7.280309477198249, "learning_rate": 9.158664520675009e-06, "loss": 17.5368, "step": 11584 }, { "epoch": 0.211764490832983, "grad_norm": 7.687530189637583, "learning_rate": 9.158500174889553e-06, "loss": 17.8132, "step": 11585 }, { "epoch": 0.2117827700294295, "grad_norm": 7.155417598881707, "learning_rate": 9.158335814528916e-06, "loss": 17.8905, "step": 11586 }, { "epoch": 0.21180104922587603, "grad_norm": 6.738417741555908, "learning_rate": 9.158171439593671e-06, "loss": 17.4718, "step": 11587 }, { "epoch": 0.21181932842232257, "grad_norm": 7.578109723057844, "learning_rate": 9.1580070500844e-06, "loss": 17.9955, "step": 11588 }, { "epoch": 0.21183760761876907, "grad_norm": 6.373369435944889, "learning_rate": 9.15784264600167e-06, "loss": 17.2122, "step": 11589 }, { "epoch": 0.2118558868152156, "grad_norm": 8.153928745446654, "learning_rate": 9.157678227346066e-06, "loss": 17.6803, "step": 11590 }, { "epoch": 0.21187416601166212, "grad_norm": 7.7531314636113215, "learning_rate": 9.157513794118158e-06, "loss": 17.7606, "step": 11591 }, { "epoch": 0.21189244520810865, "grad_norm": 8.286553170015672, "learning_rate": 9.157349346318525e-06, "loss": 17.923, "step": 11592 }, { "epoch": 0.21191072440455516, "grad_norm": 6.890711489229295, "learning_rate": 9.157184883947745e-06, "loss": 17.7812, "step": 11593 }, { "epoch": 0.2119290036010017, "grad_norm": 8.536433843597331, "learning_rate": 9.157020407006392e-06, "loss": 17.5298, "step": 11594 }, { "epoch": 0.21194728279744823, "grad_norm": 8.666063839961629, "learning_rate": 9.156855915495043e-06, "loss": 18.1918, "step": 11595 }, { "epoch": 0.21196556199389474, "grad_norm": 6.250026801355732, "learning_rate": 9.156691409414275e-06, "loss": 17.3656, "step": 11596 }, { "epoch": 0.21198384119034128, "grad_norm": 6.820324698686371, "learning_rate": 9.156526888764664e-06, "loss": 18.0642, "step": 11597 }, { "epoch": 0.21200212038678778, "grad_norm": 5.655656072320394, "learning_rate": 9.156362353546789e-06, "loss": 17.1208, "step": 11598 }, { "epoch": 0.21202039958323432, "grad_norm": 7.6187923804479265, "learning_rate": 9.156197803761223e-06, "loss": 18.0396, "step": 11599 }, { "epoch": 0.21203867877968086, "grad_norm": 5.836772120744281, "learning_rate": 9.156033239408544e-06, "loss": 17.3792, "step": 11600 }, { "epoch": 0.21205695797612736, "grad_norm": 6.5679101584275825, "learning_rate": 9.155868660489327e-06, "loss": 17.4726, "step": 11601 }, { "epoch": 0.2120752371725739, "grad_norm": 6.321499740706384, "learning_rate": 9.155704067004154e-06, "loss": 17.3891, "step": 11602 }, { "epoch": 0.2120935163690204, "grad_norm": 6.889131606502487, "learning_rate": 9.155539458953597e-06, "loss": 17.5789, "step": 11603 }, { "epoch": 0.21211179556546694, "grad_norm": 6.89687307770892, "learning_rate": 9.155374836338237e-06, "loss": 17.4977, "step": 11604 }, { "epoch": 0.21213007476191348, "grad_norm": 5.848217535319225, "learning_rate": 9.155210199158648e-06, "loss": 17.0581, "step": 11605 }, { "epoch": 0.21214835395836, "grad_norm": 6.740320780209365, "learning_rate": 9.155045547415408e-06, "loss": 17.4307, "step": 11606 }, { "epoch": 0.21216663315480652, "grad_norm": 8.038571701517087, "learning_rate": 9.154880881109093e-06, "loss": 18.2448, "step": 11607 }, { "epoch": 0.21218491235125303, "grad_norm": 6.480469147176548, "learning_rate": 9.15471620024028e-06, "loss": 17.4355, "step": 11608 }, { "epoch": 0.21220319154769957, "grad_norm": 6.531460315315049, "learning_rate": 9.15455150480955e-06, "loss": 17.3958, "step": 11609 }, { "epoch": 0.21222147074414607, "grad_norm": 7.304185423649102, "learning_rate": 9.154386794817476e-06, "loss": 18.0237, "step": 11610 }, { "epoch": 0.2122397499405926, "grad_norm": 6.429840559018353, "learning_rate": 9.154222070264637e-06, "loss": 17.3974, "step": 11611 }, { "epoch": 0.21225802913703914, "grad_norm": 5.9068517909284965, "learning_rate": 9.154057331151612e-06, "loss": 17.2643, "step": 11612 }, { "epoch": 0.21227630833348565, "grad_norm": 6.46069971775121, "learning_rate": 9.153892577478973e-06, "loss": 17.4117, "step": 11613 }, { "epoch": 0.2122945875299322, "grad_norm": 7.125258530227882, "learning_rate": 9.153727809247303e-06, "loss": 17.6316, "step": 11614 }, { "epoch": 0.2123128667263787, "grad_norm": 6.122252316473095, "learning_rate": 9.153563026457178e-06, "loss": 17.4086, "step": 11615 }, { "epoch": 0.21233114592282523, "grad_norm": 6.444666365891119, "learning_rate": 9.153398229109174e-06, "loss": 17.4249, "step": 11616 }, { "epoch": 0.21234942511927177, "grad_norm": 7.493875148419845, "learning_rate": 9.15323341720387e-06, "loss": 17.8383, "step": 11617 }, { "epoch": 0.21236770431571828, "grad_norm": 8.095743451259024, "learning_rate": 9.153068590741843e-06, "loss": 18.0685, "step": 11618 }, { "epoch": 0.2123859835121648, "grad_norm": 7.440249340564604, "learning_rate": 9.152903749723671e-06, "loss": 18.1068, "step": 11619 }, { "epoch": 0.21240426270861132, "grad_norm": 6.3605355322271295, "learning_rate": 9.152738894149931e-06, "loss": 17.5995, "step": 11620 }, { "epoch": 0.21242254190505785, "grad_norm": 5.294877973158245, "learning_rate": 9.152574024021202e-06, "loss": 17.0195, "step": 11621 }, { "epoch": 0.2124408211015044, "grad_norm": 7.75287908114004, "learning_rate": 9.152409139338062e-06, "loss": 18.4059, "step": 11622 }, { "epoch": 0.2124591002979509, "grad_norm": 5.93776794059065, "learning_rate": 9.152244240101088e-06, "loss": 17.2836, "step": 11623 }, { "epoch": 0.21247737949439743, "grad_norm": 8.964291260472157, "learning_rate": 9.152079326310858e-06, "loss": 18.1602, "step": 11624 }, { "epoch": 0.21249565869084394, "grad_norm": 5.577234267455074, "learning_rate": 9.151914397967952e-06, "loss": 17.0911, "step": 11625 }, { "epoch": 0.21251393788729048, "grad_norm": 5.771379750917453, "learning_rate": 9.151749455072945e-06, "loss": 17.3774, "step": 11626 }, { "epoch": 0.21253221708373699, "grad_norm": 6.673806956891338, "learning_rate": 9.151584497626415e-06, "loss": 17.5897, "step": 11627 }, { "epoch": 0.21255049628018352, "grad_norm": 6.5728951995585945, "learning_rate": 9.151419525628942e-06, "loss": 17.6813, "step": 11628 }, { "epoch": 0.21256877547663006, "grad_norm": 6.38576838913584, "learning_rate": 9.151254539081106e-06, "loss": 17.3931, "step": 11629 }, { "epoch": 0.21258705467307656, "grad_norm": 6.229414042846477, "learning_rate": 9.151089537983482e-06, "loss": 17.3624, "step": 11630 }, { "epoch": 0.2126053338695231, "grad_norm": 7.222747357571975, "learning_rate": 9.150924522336648e-06, "loss": 17.5589, "step": 11631 }, { "epoch": 0.2126236130659696, "grad_norm": 6.316618315287245, "learning_rate": 9.150759492141186e-06, "loss": 17.6173, "step": 11632 }, { "epoch": 0.21264189226241614, "grad_norm": 6.707411291395412, "learning_rate": 9.15059444739767e-06, "loss": 17.5739, "step": 11633 }, { "epoch": 0.21266017145886268, "grad_norm": 7.791898302407172, "learning_rate": 9.150429388106684e-06, "loss": 17.9749, "step": 11634 }, { "epoch": 0.2126784506553092, "grad_norm": 7.6586665707499195, "learning_rate": 9.1502643142688e-06, "loss": 17.9408, "step": 11635 }, { "epoch": 0.21269672985175572, "grad_norm": 7.394545431095758, "learning_rate": 9.1500992258846e-06, "loss": 17.6004, "step": 11636 }, { "epoch": 0.21271500904820223, "grad_norm": 7.765655019870423, "learning_rate": 9.149934122954662e-06, "loss": 18.0874, "step": 11637 }, { "epoch": 0.21273328824464877, "grad_norm": 5.704399797239602, "learning_rate": 9.149769005479568e-06, "loss": 16.9736, "step": 11638 }, { "epoch": 0.2127515674410953, "grad_norm": 7.2538294596919854, "learning_rate": 9.14960387345989e-06, "loss": 18.0825, "step": 11639 }, { "epoch": 0.2127698466375418, "grad_norm": 7.820846597900338, "learning_rate": 9.149438726896213e-06, "loss": 17.9034, "step": 11640 }, { "epoch": 0.21278812583398835, "grad_norm": 7.4306590408094, "learning_rate": 9.149273565789112e-06, "loss": 17.8273, "step": 11641 }, { "epoch": 0.21280640503043485, "grad_norm": 7.559820598413737, "learning_rate": 9.149108390139168e-06, "loss": 18.0741, "step": 11642 }, { "epoch": 0.2128246842268814, "grad_norm": 6.603188208185257, "learning_rate": 9.148943199946958e-06, "loss": 17.4214, "step": 11643 }, { "epoch": 0.2128429634233279, "grad_norm": 6.026696245161655, "learning_rate": 9.148777995213062e-06, "loss": 17.2833, "step": 11644 }, { "epoch": 0.21286124261977443, "grad_norm": 7.963908018718288, "learning_rate": 9.148612775938062e-06, "loss": 18.3751, "step": 11645 }, { "epoch": 0.21287952181622097, "grad_norm": 7.869206912743326, "learning_rate": 9.148447542122532e-06, "loss": 18.0016, "step": 11646 }, { "epoch": 0.21289780101266748, "grad_norm": 6.242017443935883, "learning_rate": 9.148282293767053e-06, "loss": 17.4454, "step": 11647 }, { "epoch": 0.212916080209114, "grad_norm": 7.435901395097871, "learning_rate": 9.148117030872206e-06, "loss": 17.8332, "step": 11648 }, { "epoch": 0.21293435940556052, "grad_norm": 7.250999550074894, "learning_rate": 9.147951753438568e-06, "loss": 17.8396, "step": 11649 }, { "epoch": 0.21295263860200705, "grad_norm": 6.9846991067583355, "learning_rate": 9.147786461466718e-06, "loss": 18.0227, "step": 11650 }, { "epoch": 0.2129709177984536, "grad_norm": 6.5260356530520305, "learning_rate": 9.147621154957238e-06, "loss": 17.4412, "step": 11651 }, { "epoch": 0.2129891969949001, "grad_norm": 7.501193901397891, "learning_rate": 9.147455833910706e-06, "loss": 18.01, "step": 11652 }, { "epoch": 0.21300747619134663, "grad_norm": 6.063653386051944, "learning_rate": 9.147290498327698e-06, "loss": 17.2201, "step": 11653 }, { "epoch": 0.21302575538779314, "grad_norm": 6.530678628238025, "learning_rate": 9.1471251482088e-06, "loss": 17.4399, "step": 11654 }, { "epoch": 0.21304403458423968, "grad_norm": 6.143164244941838, "learning_rate": 9.146959783554587e-06, "loss": 17.2389, "step": 11655 }, { "epoch": 0.2130623137806862, "grad_norm": 7.801729789804826, "learning_rate": 9.14679440436564e-06, "loss": 18.2345, "step": 11656 }, { "epoch": 0.21308059297713272, "grad_norm": 7.16214036325532, "learning_rate": 9.14662901064254e-06, "loss": 17.6856, "step": 11657 }, { "epoch": 0.21309887217357926, "grad_norm": 6.371340189688993, "learning_rate": 9.146463602385863e-06, "loss": 17.192, "step": 11658 }, { "epoch": 0.21311715137002576, "grad_norm": 8.98845559325511, "learning_rate": 9.146298179596191e-06, "loss": 18.3197, "step": 11659 }, { "epoch": 0.2131354305664723, "grad_norm": 7.3263238137853826, "learning_rate": 9.146132742274106e-06, "loss": 18.0394, "step": 11660 }, { "epoch": 0.2131537097629188, "grad_norm": 5.462073782100659, "learning_rate": 9.145967290420184e-06, "loss": 17.2091, "step": 11661 }, { "epoch": 0.21317198895936534, "grad_norm": 6.746639516360985, "learning_rate": 9.145801824035006e-06, "loss": 17.8386, "step": 11662 }, { "epoch": 0.21319026815581188, "grad_norm": 7.6689912130221645, "learning_rate": 9.145636343119152e-06, "loss": 17.9464, "step": 11663 }, { "epoch": 0.2132085473522584, "grad_norm": 6.695748876445942, "learning_rate": 9.145470847673203e-06, "loss": 17.5456, "step": 11664 }, { "epoch": 0.21322682654870492, "grad_norm": 8.198516003864741, "learning_rate": 9.145305337697737e-06, "loss": 17.909, "step": 11665 }, { "epoch": 0.21324510574515143, "grad_norm": 6.908031273691143, "learning_rate": 9.145139813193337e-06, "loss": 17.966, "step": 11666 }, { "epoch": 0.21326338494159797, "grad_norm": 7.055918827022239, "learning_rate": 9.144974274160582e-06, "loss": 17.9561, "step": 11667 }, { "epoch": 0.2132816641380445, "grad_norm": 6.398251192858152, "learning_rate": 9.144808720600052e-06, "loss": 17.5263, "step": 11668 }, { "epoch": 0.213299943334491, "grad_norm": 6.898360543071194, "learning_rate": 9.144643152512326e-06, "loss": 17.321, "step": 11669 }, { "epoch": 0.21331822253093755, "grad_norm": 6.859210135796859, "learning_rate": 9.144477569897984e-06, "loss": 17.7609, "step": 11670 }, { "epoch": 0.21333650172738405, "grad_norm": 6.722237461387456, "learning_rate": 9.14431197275761e-06, "loss": 17.55, "step": 11671 }, { "epoch": 0.2133547809238306, "grad_norm": 5.435865481530694, "learning_rate": 9.144146361091784e-06, "loss": 17.2483, "step": 11672 }, { "epoch": 0.21337306012027712, "grad_norm": 6.935190425853276, "learning_rate": 9.143980734901082e-06, "loss": 17.8095, "step": 11673 }, { "epoch": 0.21339133931672363, "grad_norm": 6.078212964955392, "learning_rate": 9.143815094186088e-06, "loss": 17.3058, "step": 11674 }, { "epoch": 0.21340961851317017, "grad_norm": 6.8529284508095865, "learning_rate": 9.14364943894738e-06, "loss": 17.7405, "step": 11675 }, { "epoch": 0.21342789770961668, "grad_norm": 7.243324531782682, "learning_rate": 9.143483769185542e-06, "loss": 18.0304, "step": 11676 }, { "epoch": 0.2134461769060632, "grad_norm": 7.7500115768982765, "learning_rate": 9.143318084901152e-06, "loss": 17.9287, "step": 11677 }, { "epoch": 0.21346445610250972, "grad_norm": 6.009762398044453, "learning_rate": 9.143152386094791e-06, "loss": 17.3918, "step": 11678 }, { "epoch": 0.21348273529895626, "grad_norm": 7.524750726459312, "learning_rate": 9.142986672767042e-06, "loss": 17.9176, "step": 11679 }, { "epoch": 0.2135010144954028, "grad_norm": 6.387471572678087, "learning_rate": 9.142820944918485e-06, "loss": 17.4112, "step": 11680 }, { "epoch": 0.2135192936918493, "grad_norm": 7.499335328259966, "learning_rate": 9.142655202549698e-06, "loss": 17.9846, "step": 11681 }, { "epoch": 0.21353757288829583, "grad_norm": 9.125716072213985, "learning_rate": 9.142489445661265e-06, "loss": 17.9601, "step": 11682 }, { "epoch": 0.21355585208474234, "grad_norm": 8.12236431625984, "learning_rate": 9.142323674253766e-06, "loss": 18.2265, "step": 11683 }, { "epoch": 0.21357413128118888, "grad_norm": 7.314767327594051, "learning_rate": 9.142157888327781e-06, "loss": 17.8159, "step": 11684 }, { "epoch": 0.2135924104776354, "grad_norm": 6.091776107112946, "learning_rate": 9.141992087883893e-06, "loss": 17.414, "step": 11685 }, { "epoch": 0.21361068967408192, "grad_norm": 8.327713269654664, "learning_rate": 9.141826272922683e-06, "loss": 17.9633, "step": 11686 }, { "epoch": 0.21362896887052846, "grad_norm": 7.269109843841839, "learning_rate": 9.14166044344473e-06, "loss": 17.8597, "step": 11687 }, { "epoch": 0.21364724806697497, "grad_norm": 4.9721885361299805, "learning_rate": 9.141494599450615e-06, "loss": 16.8696, "step": 11688 }, { "epoch": 0.2136655272634215, "grad_norm": 7.090417751395468, "learning_rate": 9.141328740940922e-06, "loss": 17.8949, "step": 11689 }, { "epoch": 0.21368380645986804, "grad_norm": 7.035078818145415, "learning_rate": 9.141162867916234e-06, "loss": 17.4371, "step": 11690 }, { "epoch": 0.21370208565631454, "grad_norm": 16.43434679237421, "learning_rate": 9.140996980377126e-06, "loss": 18.5113, "step": 11691 }, { "epoch": 0.21372036485276108, "grad_norm": 6.384761996289143, "learning_rate": 9.140831078324183e-06, "loss": 17.2118, "step": 11692 }, { "epoch": 0.2137386440492076, "grad_norm": 4.832332939261827, "learning_rate": 9.140665161757988e-06, "loss": 16.8148, "step": 11693 }, { "epoch": 0.21375692324565412, "grad_norm": 5.436442815946285, "learning_rate": 9.14049923067912e-06, "loss": 17.0378, "step": 11694 }, { "epoch": 0.21377520244210063, "grad_norm": 6.427883132973813, "learning_rate": 9.140333285088162e-06, "loss": 17.2919, "step": 11695 }, { "epoch": 0.21379348163854717, "grad_norm": 6.633390866294447, "learning_rate": 9.140167324985695e-06, "loss": 17.6946, "step": 11696 }, { "epoch": 0.2138117608349937, "grad_norm": 6.974052542862472, "learning_rate": 9.140001350372302e-06, "loss": 17.7674, "step": 11697 }, { "epoch": 0.2138300400314402, "grad_norm": 7.825334790348214, "learning_rate": 9.139835361248563e-06, "loss": 18.2005, "step": 11698 }, { "epoch": 0.21384831922788675, "grad_norm": 6.478210075398582, "learning_rate": 9.139669357615059e-06, "loss": 17.6169, "step": 11699 }, { "epoch": 0.21386659842433325, "grad_norm": 5.855948759772832, "learning_rate": 9.139503339472375e-06, "loss": 17.3109, "step": 11700 }, { "epoch": 0.2138848776207798, "grad_norm": 7.681421099782268, "learning_rate": 9.139337306821089e-06, "loss": 18.3091, "step": 11701 }, { "epoch": 0.21390315681722633, "grad_norm": 7.687429016987093, "learning_rate": 9.139171259661785e-06, "loss": 18.2078, "step": 11702 }, { "epoch": 0.21392143601367283, "grad_norm": 7.724126260515264, "learning_rate": 9.139005197995046e-06, "loss": 18.0587, "step": 11703 }, { "epoch": 0.21393971521011937, "grad_norm": 7.107129911998939, "learning_rate": 9.138839121821454e-06, "loss": 17.5602, "step": 11704 }, { "epoch": 0.21395799440656588, "grad_norm": 7.221122836910635, "learning_rate": 9.138673031141587e-06, "loss": 17.8326, "step": 11705 }, { "epoch": 0.2139762736030124, "grad_norm": 7.428135615526704, "learning_rate": 9.138506925956032e-06, "loss": 17.6103, "step": 11706 }, { "epoch": 0.21399455279945895, "grad_norm": 6.715112095164756, "learning_rate": 9.13834080626537e-06, "loss": 17.5912, "step": 11707 }, { "epoch": 0.21401283199590546, "grad_norm": 6.911483038049616, "learning_rate": 9.138174672070181e-06, "loss": 17.9271, "step": 11708 }, { "epoch": 0.214031111192352, "grad_norm": 7.199424686812347, "learning_rate": 9.13800852337105e-06, "loss": 17.4627, "step": 11709 }, { "epoch": 0.2140493903887985, "grad_norm": 6.34042024283748, "learning_rate": 9.137842360168559e-06, "loss": 17.6055, "step": 11710 }, { "epoch": 0.21406766958524504, "grad_norm": 7.195130962687598, "learning_rate": 9.137676182463287e-06, "loss": 18.0304, "step": 11711 }, { "epoch": 0.21408594878169154, "grad_norm": 7.174069647519072, "learning_rate": 9.13750999025582e-06, "loss": 18.0179, "step": 11712 }, { "epoch": 0.21410422797813808, "grad_norm": 6.9921154121447895, "learning_rate": 9.137343783546741e-06, "loss": 17.8961, "step": 11713 }, { "epoch": 0.21412250717458461, "grad_norm": 6.625369188687903, "learning_rate": 9.13717756233663e-06, "loss": 17.7955, "step": 11714 }, { "epoch": 0.21414078637103112, "grad_norm": 6.851585685886345, "learning_rate": 9.137011326626071e-06, "loss": 17.5806, "step": 11715 }, { "epoch": 0.21415906556747766, "grad_norm": 5.764128584589972, "learning_rate": 9.136845076415645e-06, "loss": 17.2906, "step": 11716 }, { "epoch": 0.21417734476392417, "grad_norm": 6.615149485649565, "learning_rate": 9.136678811705937e-06, "loss": 17.7314, "step": 11717 }, { "epoch": 0.2141956239603707, "grad_norm": 7.0609122220256575, "learning_rate": 9.13651253249753e-06, "loss": 17.646, "step": 11718 }, { "epoch": 0.21421390315681724, "grad_norm": 6.169439045457185, "learning_rate": 9.136346238791004e-06, "loss": 17.2506, "step": 11719 }, { "epoch": 0.21423218235326374, "grad_norm": 6.770321955388486, "learning_rate": 9.136179930586944e-06, "loss": 17.7473, "step": 11720 }, { "epoch": 0.21425046154971028, "grad_norm": 6.334847259047038, "learning_rate": 9.136013607885931e-06, "loss": 17.3342, "step": 11721 }, { "epoch": 0.2142687407461568, "grad_norm": 5.40271471549218, "learning_rate": 9.135847270688548e-06, "loss": 17.1949, "step": 11722 }, { "epoch": 0.21428701994260332, "grad_norm": 5.540679582173256, "learning_rate": 9.135680918995382e-06, "loss": 17.0815, "step": 11723 }, { "epoch": 0.21430529913904986, "grad_norm": 6.869525382603854, "learning_rate": 9.135514552807014e-06, "loss": 17.702, "step": 11724 }, { "epoch": 0.21432357833549637, "grad_norm": 7.410156317625125, "learning_rate": 9.135348172124024e-06, "loss": 17.9923, "step": 11725 }, { "epoch": 0.2143418575319429, "grad_norm": 7.271365725918522, "learning_rate": 9.135181776946998e-06, "loss": 17.9684, "step": 11726 }, { "epoch": 0.2143601367283894, "grad_norm": 6.263625276598185, "learning_rate": 9.135015367276519e-06, "loss": 17.4419, "step": 11727 }, { "epoch": 0.21437841592483595, "grad_norm": 8.377459866777203, "learning_rate": 9.13484894311317e-06, "loss": 18.2825, "step": 11728 }, { "epoch": 0.21439669512128248, "grad_norm": 7.152149590163419, "learning_rate": 9.134682504457534e-06, "loss": 17.6428, "step": 11729 }, { "epoch": 0.214414974317729, "grad_norm": 6.015003874468464, "learning_rate": 9.134516051310196e-06, "loss": 17.3465, "step": 11730 }, { "epoch": 0.21443325351417553, "grad_norm": 6.631122011287288, "learning_rate": 9.134349583671738e-06, "loss": 17.4083, "step": 11731 }, { "epoch": 0.21445153271062203, "grad_norm": 7.006134388504954, "learning_rate": 9.134183101542742e-06, "loss": 18.0137, "step": 11732 }, { "epoch": 0.21446981190706857, "grad_norm": 7.5871653350385735, "learning_rate": 9.134016604923792e-06, "loss": 17.796, "step": 11733 }, { "epoch": 0.21448809110351508, "grad_norm": 5.97955367235346, "learning_rate": 9.133850093815474e-06, "loss": 17.3184, "step": 11734 }, { "epoch": 0.2145063702999616, "grad_norm": 8.284393843268226, "learning_rate": 9.13368356821837e-06, "loss": 18.2927, "step": 11735 }, { "epoch": 0.21452464949640815, "grad_norm": 8.14329714951248, "learning_rate": 9.133517028133066e-06, "loss": 17.8919, "step": 11736 }, { "epoch": 0.21454292869285466, "grad_norm": 6.615274421398922, "learning_rate": 9.13335047356014e-06, "loss": 17.3906, "step": 11737 }, { "epoch": 0.2145612078893012, "grad_norm": 6.764795377485563, "learning_rate": 9.13318390450018e-06, "loss": 17.5324, "step": 11738 }, { "epoch": 0.2145794870857477, "grad_norm": 6.6683910449413455, "learning_rate": 9.133017320953769e-06, "loss": 17.6568, "step": 11739 }, { "epoch": 0.21459776628219424, "grad_norm": 5.7621555547544006, "learning_rate": 9.132850722921494e-06, "loss": 17.3257, "step": 11740 }, { "epoch": 0.21461604547864077, "grad_norm": 6.748899472649107, "learning_rate": 9.132684110403934e-06, "loss": 17.5096, "step": 11741 }, { "epoch": 0.21463432467508728, "grad_norm": 6.898553539535939, "learning_rate": 9.132517483401673e-06, "loss": 17.6772, "step": 11742 }, { "epoch": 0.21465260387153381, "grad_norm": 7.127063150402214, "learning_rate": 9.132350841915299e-06, "loss": 18.1117, "step": 11743 }, { "epoch": 0.21467088306798032, "grad_norm": 6.748241952896581, "learning_rate": 9.132184185945392e-06, "loss": 17.6061, "step": 11744 }, { "epoch": 0.21468916226442686, "grad_norm": 6.273668380566096, "learning_rate": 9.132017515492539e-06, "loss": 17.5028, "step": 11745 }, { "epoch": 0.2147074414608734, "grad_norm": 5.616942758696618, "learning_rate": 9.131850830557323e-06, "loss": 16.9859, "step": 11746 }, { "epoch": 0.2147257206573199, "grad_norm": 6.6702528076925605, "learning_rate": 9.131684131140328e-06, "loss": 17.7597, "step": 11747 }, { "epoch": 0.21474399985376644, "grad_norm": 6.4314363441149895, "learning_rate": 9.13151741724214e-06, "loss": 17.4066, "step": 11748 }, { "epoch": 0.21476227905021295, "grad_norm": 6.254027235472205, "learning_rate": 9.131350688863341e-06, "loss": 17.4192, "step": 11749 }, { "epoch": 0.21478055824665948, "grad_norm": 6.399946667461864, "learning_rate": 9.131183946004515e-06, "loss": 17.7945, "step": 11750 }, { "epoch": 0.214798837443106, "grad_norm": 12.592891370944914, "learning_rate": 9.131017188666251e-06, "loss": 18.6017, "step": 11751 }, { "epoch": 0.21481711663955252, "grad_norm": 7.892271113563236, "learning_rate": 9.13085041684913e-06, "loss": 17.752, "step": 11752 }, { "epoch": 0.21483539583599906, "grad_norm": 6.360236938234864, "learning_rate": 9.130683630553734e-06, "loss": 17.7222, "step": 11753 }, { "epoch": 0.21485367503244557, "grad_norm": 6.957877398085484, "learning_rate": 9.130516829780652e-06, "loss": 17.6721, "step": 11754 }, { "epoch": 0.2148719542288921, "grad_norm": 5.36767478304127, "learning_rate": 9.130350014530465e-06, "loss": 17.0818, "step": 11755 }, { "epoch": 0.2148902334253386, "grad_norm": 6.7866213840917045, "learning_rate": 9.13018318480376e-06, "loss": 17.5193, "step": 11756 }, { "epoch": 0.21490851262178515, "grad_norm": 7.310114998337191, "learning_rate": 9.130016340601124e-06, "loss": 18.1721, "step": 11757 }, { "epoch": 0.21492679181823168, "grad_norm": 7.766754764704571, "learning_rate": 9.129849481923137e-06, "loss": 17.9885, "step": 11758 }, { "epoch": 0.2149450710146782, "grad_norm": 7.358159678313833, "learning_rate": 9.129682608770388e-06, "loss": 17.8406, "step": 11759 }, { "epoch": 0.21496335021112473, "grad_norm": 6.34846838623486, "learning_rate": 9.129515721143459e-06, "loss": 17.1203, "step": 11760 }, { "epoch": 0.21498162940757123, "grad_norm": 6.503362003029298, "learning_rate": 9.129348819042934e-06, "loss": 17.3981, "step": 11761 }, { "epoch": 0.21499990860401777, "grad_norm": 7.710973776237608, "learning_rate": 9.1291819024694e-06, "loss": 18.0849, "step": 11762 }, { "epoch": 0.2150181878004643, "grad_norm": 7.169148622837023, "learning_rate": 9.129014971423442e-06, "loss": 17.79, "step": 11763 }, { "epoch": 0.2150364669969108, "grad_norm": 6.419588401375401, "learning_rate": 9.128848025905645e-06, "loss": 17.4207, "step": 11764 }, { "epoch": 0.21505474619335735, "grad_norm": 6.879463926786173, "learning_rate": 9.128681065916596e-06, "loss": 17.5164, "step": 11765 }, { "epoch": 0.21507302538980386, "grad_norm": 6.559677967491284, "learning_rate": 9.128514091456876e-06, "loss": 17.3865, "step": 11766 }, { "epoch": 0.2150913045862504, "grad_norm": 6.290869658016945, "learning_rate": 9.128347102527072e-06, "loss": 17.1739, "step": 11767 }, { "epoch": 0.2151095837826969, "grad_norm": 7.5171335927678635, "learning_rate": 9.128180099127772e-06, "loss": 17.824, "step": 11768 }, { "epoch": 0.21512786297914344, "grad_norm": 6.275706202548021, "learning_rate": 9.128013081259557e-06, "loss": 17.4745, "step": 11769 }, { "epoch": 0.21514614217558997, "grad_norm": 7.312128322408952, "learning_rate": 9.127846048923015e-06, "loss": 17.5866, "step": 11770 }, { "epoch": 0.21516442137203648, "grad_norm": 6.570097448543907, "learning_rate": 9.127679002118731e-06, "loss": 17.8862, "step": 11771 }, { "epoch": 0.21518270056848302, "grad_norm": 6.802289447448312, "learning_rate": 9.12751194084729e-06, "loss": 17.5273, "step": 11772 }, { "epoch": 0.21520097976492952, "grad_norm": 7.7264833228498775, "learning_rate": 9.127344865109276e-06, "loss": 17.8344, "step": 11773 }, { "epoch": 0.21521925896137606, "grad_norm": 6.054914653447179, "learning_rate": 9.12717777490528e-06, "loss": 17.6735, "step": 11774 }, { "epoch": 0.2152375381578226, "grad_norm": 6.365035929911253, "learning_rate": 9.12701067023588e-06, "loss": 17.7055, "step": 11775 }, { "epoch": 0.2152558173542691, "grad_norm": 5.932996851989658, "learning_rate": 9.12684355110167e-06, "loss": 17.1325, "step": 11776 }, { "epoch": 0.21527409655071564, "grad_norm": 6.515564613167469, "learning_rate": 9.126676417503229e-06, "loss": 17.8026, "step": 11777 }, { "epoch": 0.21529237574716215, "grad_norm": 7.039026329151559, "learning_rate": 9.126509269441144e-06, "loss": 17.803, "step": 11778 }, { "epoch": 0.21531065494360868, "grad_norm": 7.11009534613514, "learning_rate": 9.126342106916005e-06, "loss": 17.6082, "step": 11779 }, { "epoch": 0.21532893414005522, "grad_norm": 6.431209903045549, "learning_rate": 9.126174929928394e-06, "loss": 17.5655, "step": 11780 }, { "epoch": 0.21534721333650172, "grad_norm": 6.794067942928916, "learning_rate": 9.126007738478897e-06, "loss": 17.7129, "step": 11781 }, { "epoch": 0.21536549253294826, "grad_norm": 6.129034430365924, "learning_rate": 9.1258405325681e-06, "loss": 17.2967, "step": 11782 }, { "epoch": 0.21538377172939477, "grad_norm": 6.228539361149761, "learning_rate": 9.125673312196592e-06, "loss": 17.4575, "step": 11783 }, { "epoch": 0.2154020509258413, "grad_norm": 6.686981454888693, "learning_rate": 9.125506077364958e-06, "loss": 17.8332, "step": 11784 }, { "epoch": 0.2154203301222878, "grad_norm": 5.786652186785163, "learning_rate": 9.125338828073781e-06, "loss": 17.1764, "step": 11785 }, { "epoch": 0.21543860931873435, "grad_norm": 8.721482698306923, "learning_rate": 9.125171564323649e-06, "loss": 18.1051, "step": 11786 }, { "epoch": 0.21545688851518088, "grad_norm": 5.932436499512886, "learning_rate": 9.12500428611515e-06, "loss": 17.0535, "step": 11787 }, { "epoch": 0.2154751677116274, "grad_norm": 6.780036753127285, "learning_rate": 9.124836993448868e-06, "loss": 17.8021, "step": 11788 }, { "epoch": 0.21549344690807393, "grad_norm": 6.002060187567544, "learning_rate": 9.12466968632539e-06, "loss": 17.2499, "step": 11789 }, { "epoch": 0.21551172610452043, "grad_norm": 6.589613600248243, "learning_rate": 9.124502364745305e-06, "loss": 17.5803, "step": 11790 }, { "epoch": 0.21553000530096697, "grad_norm": 5.867571287420147, "learning_rate": 9.124335028709197e-06, "loss": 17.2653, "step": 11791 }, { "epoch": 0.2155482844974135, "grad_norm": 7.073145531524499, "learning_rate": 9.12416767821765e-06, "loss": 18.1059, "step": 11792 }, { "epoch": 0.21556656369386, "grad_norm": 6.988284935897942, "learning_rate": 9.124000313271256e-06, "loss": 17.9629, "step": 11793 }, { "epoch": 0.21558484289030655, "grad_norm": 5.539287280895075, "learning_rate": 9.123832933870597e-06, "loss": 17.3149, "step": 11794 }, { "epoch": 0.21560312208675306, "grad_norm": 6.4895026181279984, "learning_rate": 9.123665540016262e-06, "loss": 17.5314, "step": 11795 }, { "epoch": 0.2156214012831996, "grad_norm": 7.069783717638051, "learning_rate": 9.123498131708837e-06, "loss": 17.6707, "step": 11796 }, { "epoch": 0.21563968047964613, "grad_norm": 6.540982541515931, "learning_rate": 9.123330708948908e-06, "loss": 17.6134, "step": 11797 }, { "epoch": 0.21565795967609264, "grad_norm": 7.211356462757298, "learning_rate": 9.123163271737063e-06, "loss": 17.7928, "step": 11798 }, { "epoch": 0.21567623887253917, "grad_norm": 5.38931003982935, "learning_rate": 9.12299582007389e-06, "loss": 17.2277, "step": 11799 }, { "epoch": 0.21569451806898568, "grad_norm": 6.84494750179637, "learning_rate": 9.122828353959971e-06, "loss": 17.9003, "step": 11800 }, { "epoch": 0.21571279726543222, "grad_norm": 6.745374541778459, "learning_rate": 9.1226608733959e-06, "loss": 17.5247, "step": 11801 }, { "epoch": 0.21573107646187872, "grad_norm": 5.554898939967108, "learning_rate": 9.122493378382259e-06, "loss": 17.0669, "step": 11802 }, { "epoch": 0.21574935565832526, "grad_norm": 6.301156541169049, "learning_rate": 9.122325868919637e-06, "loss": 17.3965, "step": 11803 }, { "epoch": 0.2157676348547718, "grad_norm": 6.630125518593899, "learning_rate": 9.12215834500862e-06, "loss": 17.8173, "step": 11804 }, { "epoch": 0.2157859140512183, "grad_norm": 6.10525941766438, "learning_rate": 9.121990806649795e-06, "loss": 17.5165, "step": 11805 }, { "epoch": 0.21580419324766484, "grad_norm": 6.8259056305339945, "learning_rate": 9.12182325384375e-06, "loss": 17.8236, "step": 11806 }, { "epoch": 0.21582247244411135, "grad_norm": 5.807572762701553, "learning_rate": 9.121655686591073e-06, "loss": 17.262, "step": 11807 }, { "epoch": 0.21584075164055788, "grad_norm": 5.953191464279736, "learning_rate": 9.121488104892352e-06, "loss": 17.3984, "step": 11808 }, { "epoch": 0.21585903083700442, "grad_norm": 5.750383837581517, "learning_rate": 9.121320508748171e-06, "loss": 17.3132, "step": 11809 }, { "epoch": 0.21587731003345093, "grad_norm": 6.541225734110672, "learning_rate": 9.121152898159118e-06, "loss": 17.7538, "step": 11810 }, { "epoch": 0.21589558922989746, "grad_norm": 8.125911642449207, "learning_rate": 9.120985273125784e-06, "loss": 18.2132, "step": 11811 }, { "epoch": 0.21591386842634397, "grad_norm": 7.2094951004451, "learning_rate": 9.120817633648753e-06, "loss": 17.8451, "step": 11812 }, { "epoch": 0.2159321476227905, "grad_norm": 5.94342052308106, "learning_rate": 9.120649979728615e-06, "loss": 17.1825, "step": 11813 }, { "epoch": 0.21595042681923704, "grad_norm": 5.519430438837936, "learning_rate": 9.120482311365955e-06, "loss": 17.0082, "step": 11814 }, { "epoch": 0.21596870601568355, "grad_norm": 7.840471022481249, "learning_rate": 9.120314628561362e-06, "loss": 18.1265, "step": 11815 }, { "epoch": 0.21598698521213008, "grad_norm": 6.227794689099335, "learning_rate": 9.120146931315424e-06, "loss": 17.2198, "step": 11816 }, { "epoch": 0.2160052644085766, "grad_norm": 6.449503747615225, "learning_rate": 9.11997921962873e-06, "loss": 17.7227, "step": 11817 }, { "epoch": 0.21602354360502313, "grad_norm": 6.598491712152966, "learning_rate": 9.119811493501865e-06, "loss": 17.4789, "step": 11818 }, { "epoch": 0.21604182280146964, "grad_norm": 7.658603780721176, "learning_rate": 9.11964375293542e-06, "loss": 17.9342, "step": 11819 }, { "epoch": 0.21606010199791617, "grad_norm": 6.813154337910873, "learning_rate": 9.11947599792998e-06, "loss": 17.698, "step": 11820 }, { "epoch": 0.2160783811943627, "grad_norm": 7.735146543824995, "learning_rate": 9.11930822848613e-06, "loss": 17.4835, "step": 11821 }, { "epoch": 0.21609666039080921, "grad_norm": 6.586523536038008, "learning_rate": 9.119140444604467e-06, "loss": 17.6442, "step": 11822 }, { "epoch": 0.21611493958725575, "grad_norm": 5.802005255457315, "learning_rate": 9.118972646285573e-06, "loss": 17.0101, "step": 11823 }, { "epoch": 0.21613321878370226, "grad_norm": 7.947883271402378, "learning_rate": 9.118804833530037e-06, "loss": 18.2263, "step": 11824 }, { "epoch": 0.2161514979801488, "grad_norm": 6.600167212655567, "learning_rate": 9.118637006338448e-06, "loss": 17.4165, "step": 11825 }, { "epoch": 0.21616977717659533, "grad_norm": 8.321030702889383, "learning_rate": 9.118469164711394e-06, "loss": 18.3109, "step": 11826 }, { "epoch": 0.21618805637304184, "grad_norm": 6.815631949701846, "learning_rate": 9.118301308649461e-06, "loss": 17.6911, "step": 11827 }, { "epoch": 0.21620633556948837, "grad_norm": 7.600373915365126, "learning_rate": 9.118133438153242e-06, "loss": 17.961, "step": 11828 }, { "epoch": 0.21622461476593488, "grad_norm": 7.772198325610063, "learning_rate": 9.11796555322332e-06, "loss": 18.0769, "step": 11829 }, { "epoch": 0.21624289396238142, "grad_norm": 6.9767876318635675, "learning_rate": 9.117797653860288e-06, "loss": 17.5627, "step": 11830 }, { "epoch": 0.21626117315882795, "grad_norm": 6.443769562013518, "learning_rate": 9.117629740064732e-06, "loss": 17.4205, "step": 11831 }, { "epoch": 0.21627945235527446, "grad_norm": 7.680971999628747, "learning_rate": 9.117461811837241e-06, "loss": 17.9219, "step": 11832 }, { "epoch": 0.216297731551721, "grad_norm": 6.172812442547839, "learning_rate": 9.117293869178404e-06, "loss": 17.427, "step": 11833 }, { "epoch": 0.2163160107481675, "grad_norm": 6.9445119510917666, "learning_rate": 9.11712591208881e-06, "loss": 17.9369, "step": 11834 }, { "epoch": 0.21633428994461404, "grad_norm": 6.02796978592889, "learning_rate": 9.116957940569044e-06, "loss": 17.5246, "step": 11835 }, { "epoch": 0.21635256914106055, "grad_norm": 5.972191777563683, "learning_rate": 9.1167899546197e-06, "loss": 17.317, "step": 11836 }, { "epoch": 0.21637084833750708, "grad_norm": 6.9378546496847235, "learning_rate": 9.116621954241364e-06, "loss": 17.3589, "step": 11837 }, { "epoch": 0.21638912753395362, "grad_norm": 6.452519156452831, "learning_rate": 9.116453939434626e-06, "loss": 17.6529, "step": 11838 }, { "epoch": 0.21640740673040013, "grad_norm": 5.643146956750747, "learning_rate": 9.116285910200074e-06, "loss": 17.2308, "step": 11839 }, { "epoch": 0.21642568592684666, "grad_norm": 8.741670301991528, "learning_rate": 9.116117866538297e-06, "loss": 18.1486, "step": 11840 }, { "epoch": 0.21644396512329317, "grad_norm": 7.543726740294994, "learning_rate": 9.115949808449883e-06, "loss": 18.0334, "step": 11841 }, { "epoch": 0.2164622443197397, "grad_norm": 8.26587446562942, "learning_rate": 9.115781735935423e-06, "loss": 18.1301, "step": 11842 }, { "epoch": 0.21648052351618624, "grad_norm": 7.667334811607829, "learning_rate": 9.115613648995504e-06, "loss": 17.8988, "step": 11843 }, { "epoch": 0.21649880271263275, "grad_norm": 7.209493264717548, "learning_rate": 9.115445547630716e-06, "loss": 17.7141, "step": 11844 }, { "epoch": 0.21651708190907928, "grad_norm": 6.4966574132294745, "learning_rate": 9.115277431841652e-06, "loss": 17.5758, "step": 11845 }, { "epoch": 0.2165353611055258, "grad_norm": 7.053509179934936, "learning_rate": 9.115109301628893e-06, "loss": 18.1974, "step": 11846 }, { "epoch": 0.21655364030197233, "grad_norm": 6.753767758501346, "learning_rate": 9.114941156993036e-06, "loss": 17.7995, "step": 11847 }, { "epoch": 0.21657191949841886, "grad_norm": 8.437271655610894, "learning_rate": 9.114772997934667e-06, "loss": 18.6091, "step": 11848 }, { "epoch": 0.21659019869486537, "grad_norm": 7.405182553876967, "learning_rate": 9.114604824454376e-06, "loss": 18.0246, "step": 11849 }, { "epoch": 0.2166084778913119, "grad_norm": 6.925499954862183, "learning_rate": 9.11443663655275e-06, "loss": 17.683, "step": 11850 }, { "epoch": 0.21662675708775841, "grad_norm": 7.02375638010205, "learning_rate": 9.114268434230383e-06, "loss": 17.6752, "step": 11851 }, { "epoch": 0.21664503628420495, "grad_norm": 8.319262822755558, "learning_rate": 9.11410021748786e-06, "loss": 18.2466, "step": 11852 }, { "epoch": 0.21666331548065146, "grad_norm": 5.675523731439726, "learning_rate": 9.113931986325775e-06, "loss": 17.0465, "step": 11853 }, { "epoch": 0.216681594677098, "grad_norm": 6.491090021517005, "learning_rate": 9.113763740744715e-06, "loss": 17.6491, "step": 11854 }, { "epoch": 0.21669987387354453, "grad_norm": 8.177124437509113, "learning_rate": 9.11359548074527e-06, "loss": 18.0958, "step": 11855 }, { "epoch": 0.21671815306999104, "grad_norm": 5.667189848145001, "learning_rate": 9.113427206328028e-06, "loss": 17.1354, "step": 11856 }, { "epoch": 0.21673643226643757, "grad_norm": 7.862395710919348, "learning_rate": 9.113258917493581e-06, "loss": 17.8076, "step": 11857 }, { "epoch": 0.21675471146288408, "grad_norm": 5.5223788730459775, "learning_rate": 9.11309061424252e-06, "loss": 17.1542, "step": 11858 }, { "epoch": 0.21677299065933062, "grad_norm": 7.686488475290111, "learning_rate": 9.112922296575433e-06, "loss": 18.2554, "step": 11859 }, { "epoch": 0.21679126985577715, "grad_norm": 5.599340225252916, "learning_rate": 9.11275396449291e-06, "loss": 17.1083, "step": 11860 }, { "epoch": 0.21680954905222366, "grad_norm": 7.6510061032633, "learning_rate": 9.11258561799554e-06, "loss": 18.2384, "step": 11861 }, { "epoch": 0.2168278282486702, "grad_norm": 6.827662276542155, "learning_rate": 9.112417257083916e-06, "loss": 17.6346, "step": 11862 }, { "epoch": 0.2168461074451167, "grad_norm": 6.4249535929699695, "learning_rate": 9.112248881758625e-06, "loss": 17.3116, "step": 11863 }, { "epoch": 0.21686438664156324, "grad_norm": 6.345876603272063, "learning_rate": 9.11208049202026e-06, "loss": 17.6555, "step": 11864 }, { "epoch": 0.21688266583800977, "grad_norm": 6.5773462015567565, "learning_rate": 9.111912087869408e-06, "loss": 17.8457, "step": 11865 }, { "epoch": 0.21690094503445628, "grad_norm": 7.1769804192445, "learning_rate": 9.111743669306663e-06, "loss": 17.7754, "step": 11866 }, { "epoch": 0.21691922423090282, "grad_norm": 6.6183778970578455, "learning_rate": 9.111575236332613e-06, "loss": 17.5283, "step": 11867 }, { "epoch": 0.21693750342734933, "grad_norm": 7.250872914207179, "learning_rate": 9.111406788947848e-06, "loss": 18.2171, "step": 11868 }, { "epoch": 0.21695578262379586, "grad_norm": 7.022477133112317, "learning_rate": 9.111238327152958e-06, "loss": 17.7527, "step": 11869 }, { "epoch": 0.21697406182024237, "grad_norm": 7.619132096028822, "learning_rate": 9.111069850948535e-06, "loss": 18.0703, "step": 11870 }, { "epoch": 0.2169923410166889, "grad_norm": 6.948474573255075, "learning_rate": 9.110901360335169e-06, "loss": 17.984, "step": 11871 }, { "epoch": 0.21701062021313544, "grad_norm": 6.442358726349963, "learning_rate": 9.11073285531345e-06, "loss": 17.4928, "step": 11872 }, { "epoch": 0.21702889940958195, "grad_norm": 7.101785052699681, "learning_rate": 9.110564335883968e-06, "loss": 17.9451, "step": 11873 }, { "epoch": 0.21704717860602848, "grad_norm": 6.56195274322934, "learning_rate": 9.110395802047319e-06, "loss": 17.7829, "step": 11874 }, { "epoch": 0.217065457802475, "grad_norm": 8.06071242426235, "learning_rate": 9.110227253804085e-06, "loss": 18.6257, "step": 11875 }, { "epoch": 0.21708373699892153, "grad_norm": 6.432825201440536, "learning_rate": 9.110058691154864e-06, "loss": 17.4202, "step": 11876 }, { "epoch": 0.21710201619536806, "grad_norm": 5.845197097981462, "learning_rate": 9.109890114100242e-06, "loss": 17.3125, "step": 11877 }, { "epoch": 0.21712029539181457, "grad_norm": 7.532775156558808, "learning_rate": 9.109721522640814e-06, "loss": 18.237, "step": 11878 }, { "epoch": 0.2171385745882611, "grad_norm": 6.287582770450092, "learning_rate": 9.109552916777166e-06, "loss": 17.3277, "step": 11879 }, { "epoch": 0.21715685378470762, "grad_norm": 6.93569583883979, "learning_rate": 9.109384296509893e-06, "loss": 17.7749, "step": 11880 }, { "epoch": 0.21717513298115415, "grad_norm": 6.934288715874354, "learning_rate": 9.109215661839585e-06, "loss": 17.8361, "step": 11881 }, { "epoch": 0.2171934121776007, "grad_norm": 7.443478827388052, "learning_rate": 9.109047012766832e-06, "loss": 18.0715, "step": 11882 }, { "epoch": 0.2172116913740472, "grad_norm": 6.191747183977963, "learning_rate": 9.108878349292225e-06, "loss": 17.3165, "step": 11883 }, { "epoch": 0.21722997057049373, "grad_norm": 8.187954700447742, "learning_rate": 9.108709671416357e-06, "loss": 17.983, "step": 11884 }, { "epoch": 0.21724824976694024, "grad_norm": 7.587699318994457, "learning_rate": 9.108540979139818e-06, "loss": 17.8604, "step": 11885 }, { "epoch": 0.21726652896338677, "grad_norm": 6.688204633883863, "learning_rate": 9.1083722724632e-06, "loss": 17.604, "step": 11886 }, { "epoch": 0.21728480815983328, "grad_norm": 7.402876541490754, "learning_rate": 9.108203551387093e-06, "loss": 17.8469, "step": 11887 }, { "epoch": 0.21730308735627982, "grad_norm": 8.390172591765566, "learning_rate": 9.108034815912089e-06, "loss": 18.5591, "step": 11888 }, { "epoch": 0.21732136655272635, "grad_norm": 6.715244974854403, "learning_rate": 9.10786606603878e-06, "loss": 17.9823, "step": 11889 }, { "epoch": 0.21733964574917286, "grad_norm": 7.638039044905897, "learning_rate": 9.107697301767757e-06, "loss": 17.8907, "step": 11890 }, { "epoch": 0.2173579249456194, "grad_norm": 12.617379013698208, "learning_rate": 9.10752852309961e-06, "loss": 19.1123, "step": 11891 }, { "epoch": 0.2173762041420659, "grad_norm": 8.498138539586357, "learning_rate": 9.107359730034932e-06, "loss": 18.0663, "step": 11892 }, { "epoch": 0.21739448333851244, "grad_norm": 6.258669536509158, "learning_rate": 9.107190922574316e-06, "loss": 17.4816, "step": 11893 }, { "epoch": 0.21741276253495898, "grad_norm": 8.139844778110206, "learning_rate": 9.107022100718353e-06, "loss": 18.3541, "step": 11894 }, { "epoch": 0.21743104173140548, "grad_norm": 6.958877476992919, "learning_rate": 9.106853264467632e-06, "loss": 17.824, "step": 11895 }, { "epoch": 0.21744932092785202, "grad_norm": 8.369859115580702, "learning_rate": 9.106684413822746e-06, "loss": 18.3058, "step": 11896 }, { "epoch": 0.21746760012429853, "grad_norm": 6.164079534457812, "learning_rate": 9.106515548784289e-06, "loss": 17.6808, "step": 11897 }, { "epoch": 0.21748587932074506, "grad_norm": 6.402373408976194, "learning_rate": 9.106346669352852e-06, "loss": 17.6395, "step": 11898 }, { "epoch": 0.2175041585171916, "grad_norm": 5.6330556941323975, "learning_rate": 9.106177775529026e-06, "loss": 17.204, "step": 11899 }, { "epoch": 0.2175224377136381, "grad_norm": 6.350200341278443, "learning_rate": 9.106008867313402e-06, "loss": 17.5304, "step": 11900 }, { "epoch": 0.21754071691008464, "grad_norm": 6.8636540147606215, "learning_rate": 9.105839944706573e-06, "loss": 17.6201, "step": 11901 }, { "epoch": 0.21755899610653115, "grad_norm": 6.294765863989509, "learning_rate": 9.105671007709134e-06, "loss": 17.6585, "step": 11902 }, { "epoch": 0.21757727530297769, "grad_norm": 6.703014440079122, "learning_rate": 9.105502056321672e-06, "loss": 17.7441, "step": 11903 }, { "epoch": 0.2175955544994242, "grad_norm": 7.431064453772132, "learning_rate": 9.10533309054478e-06, "loss": 18.0045, "step": 11904 }, { "epoch": 0.21761383369587073, "grad_norm": 5.927868837021644, "learning_rate": 9.105164110379054e-06, "loss": 17.4985, "step": 11905 }, { "epoch": 0.21763211289231726, "grad_norm": 6.456673769313275, "learning_rate": 9.104995115825086e-06, "loss": 17.6168, "step": 11906 }, { "epoch": 0.21765039208876377, "grad_norm": 6.264982376343958, "learning_rate": 9.104826106883463e-06, "loss": 17.1106, "step": 11907 }, { "epoch": 0.2176686712852103, "grad_norm": 6.995548377019063, "learning_rate": 9.104657083554783e-06, "loss": 17.756, "step": 11908 }, { "epoch": 0.21768695048165682, "grad_norm": 6.899534664640044, "learning_rate": 9.104488045839635e-06, "loss": 17.8964, "step": 11909 }, { "epoch": 0.21770522967810335, "grad_norm": 6.974231060934944, "learning_rate": 9.104318993738611e-06, "loss": 17.5191, "step": 11910 }, { "epoch": 0.2177235088745499, "grad_norm": 7.325421162880059, "learning_rate": 9.104149927252308e-06, "loss": 17.8467, "step": 11911 }, { "epoch": 0.2177417880709964, "grad_norm": 5.671400688697253, "learning_rate": 9.103980846381313e-06, "loss": 17.2265, "step": 11912 }, { "epoch": 0.21776006726744293, "grad_norm": 7.071496767851761, "learning_rate": 9.103811751126223e-06, "loss": 17.6563, "step": 11913 }, { "epoch": 0.21777834646388944, "grad_norm": 7.053972963952122, "learning_rate": 9.10364264148763e-06, "loss": 17.7474, "step": 11914 }, { "epoch": 0.21779662566033597, "grad_norm": 6.579193874837373, "learning_rate": 9.103473517466122e-06, "loss": 17.5621, "step": 11915 }, { "epoch": 0.2178149048567825, "grad_norm": 7.67772167225187, "learning_rate": 9.103304379062298e-06, "loss": 17.775, "step": 11916 }, { "epoch": 0.21783318405322902, "grad_norm": 7.306930891834455, "learning_rate": 9.103135226276747e-06, "loss": 17.7983, "step": 11917 }, { "epoch": 0.21785146324967555, "grad_norm": 6.552866014793707, "learning_rate": 9.102966059110065e-06, "loss": 17.3175, "step": 11918 }, { "epoch": 0.21786974244612206, "grad_norm": 7.010313412892092, "learning_rate": 9.10279687756284e-06, "loss": 17.6073, "step": 11919 }, { "epoch": 0.2178880216425686, "grad_norm": 6.072376752884002, "learning_rate": 9.102627681635672e-06, "loss": 17.5278, "step": 11920 }, { "epoch": 0.2179063008390151, "grad_norm": 7.112949881592971, "learning_rate": 9.102458471329147e-06, "loss": 17.6626, "step": 11921 }, { "epoch": 0.21792458003546164, "grad_norm": 7.054496167321983, "learning_rate": 9.102289246643862e-06, "loss": 17.9616, "step": 11922 }, { "epoch": 0.21794285923190818, "grad_norm": 8.625310648247885, "learning_rate": 9.102120007580408e-06, "loss": 18.2248, "step": 11923 }, { "epoch": 0.21796113842835468, "grad_norm": 8.026328983401024, "learning_rate": 9.101950754139381e-06, "loss": 18.2001, "step": 11924 }, { "epoch": 0.21797941762480122, "grad_norm": 6.643022144527676, "learning_rate": 9.101781486321371e-06, "loss": 17.6109, "step": 11925 }, { "epoch": 0.21799769682124773, "grad_norm": 7.646103526543786, "learning_rate": 9.101612204126975e-06, "loss": 17.9194, "step": 11926 }, { "epoch": 0.21801597601769426, "grad_norm": 5.488633472006613, "learning_rate": 9.101442907556782e-06, "loss": 17.4202, "step": 11927 }, { "epoch": 0.2180342552141408, "grad_norm": 7.098617209330299, "learning_rate": 9.101273596611388e-06, "loss": 17.8842, "step": 11928 }, { "epoch": 0.2180525344105873, "grad_norm": 5.565334616556032, "learning_rate": 9.101104271291386e-06, "loss": 17.1628, "step": 11929 }, { "epoch": 0.21807081360703384, "grad_norm": 6.913246404151351, "learning_rate": 9.10093493159737e-06, "loss": 17.8469, "step": 11930 }, { "epoch": 0.21808909280348035, "grad_norm": 8.524606431905655, "learning_rate": 9.100765577529934e-06, "loss": 17.9882, "step": 11931 }, { "epoch": 0.21810737199992689, "grad_norm": 5.645896753841313, "learning_rate": 9.100596209089668e-06, "loss": 17.2779, "step": 11932 }, { "epoch": 0.21812565119637342, "grad_norm": 6.447210336160625, "learning_rate": 9.10042682627717e-06, "loss": 17.4815, "step": 11933 }, { "epoch": 0.21814393039281993, "grad_norm": 6.838627540587328, "learning_rate": 9.100257429093031e-06, "loss": 17.8926, "step": 11934 }, { "epoch": 0.21816220958926646, "grad_norm": 7.473140013540203, "learning_rate": 9.100088017537844e-06, "loss": 17.7996, "step": 11935 }, { "epoch": 0.21818048878571297, "grad_norm": 7.335375339384342, "learning_rate": 9.099918591612207e-06, "loss": 18.0495, "step": 11936 }, { "epoch": 0.2181987679821595, "grad_norm": 7.779144124552107, "learning_rate": 9.09974915131671e-06, "loss": 17.809, "step": 11937 }, { "epoch": 0.21821704717860602, "grad_norm": 6.432819143883393, "learning_rate": 9.099579696651949e-06, "loss": 17.458, "step": 11938 }, { "epoch": 0.21823532637505255, "grad_norm": 6.6374244225346635, "learning_rate": 9.099410227618514e-06, "loss": 17.5334, "step": 11939 }, { "epoch": 0.2182536055714991, "grad_norm": 6.027833915964923, "learning_rate": 9.099240744217005e-06, "loss": 17.3092, "step": 11940 }, { "epoch": 0.2182718847679456, "grad_norm": 7.355883839099431, "learning_rate": 9.099071246448012e-06, "loss": 17.8985, "step": 11941 }, { "epoch": 0.21829016396439213, "grad_norm": 7.36139068850242, "learning_rate": 9.098901734312128e-06, "loss": 17.8174, "step": 11942 }, { "epoch": 0.21830844316083864, "grad_norm": 7.893589632897089, "learning_rate": 9.098732207809951e-06, "loss": 18.1183, "step": 11943 }, { "epoch": 0.21832672235728517, "grad_norm": 7.700565711248756, "learning_rate": 9.098562666942073e-06, "loss": 18.0619, "step": 11944 }, { "epoch": 0.2183450015537317, "grad_norm": 5.125093256215956, "learning_rate": 9.09839311170909e-06, "loss": 16.9294, "step": 11945 }, { "epoch": 0.21836328075017822, "grad_norm": 8.333853040328622, "learning_rate": 9.098223542111593e-06, "loss": 18.1513, "step": 11946 }, { "epoch": 0.21838155994662475, "grad_norm": 8.139593106388162, "learning_rate": 9.098053958150178e-06, "loss": 18.036, "step": 11947 }, { "epoch": 0.21839983914307126, "grad_norm": 7.634997353143541, "learning_rate": 9.09788435982544e-06, "loss": 18.227, "step": 11948 }, { "epoch": 0.2184181183395178, "grad_norm": 6.822586696970956, "learning_rate": 9.097714747137974e-06, "loss": 17.9048, "step": 11949 }, { "epoch": 0.21843639753596433, "grad_norm": 5.224206413541762, "learning_rate": 9.097545120088371e-06, "loss": 17.1113, "step": 11950 }, { "epoch": 0.21845467673241084, "grad_norm": 6.060260021453217, "learning_rate": 9.097375478677228e-06, "loss": 17.4103, "step": 11951 }, { "epoch": 0.21847295592885738, "grad_norm": 5.282185504490752, "learning_rate": 9.097205822905141e-06, "loss": 16.9444, "step": 11952 }, { "epoch": 0.21849123512530388, "grad_norm": 7.1102882167471195, "learning_rate": 9.097036152772703e-06, "loss": 17.9322, "step": 11953 }, { "epoch": 0.21850951432175042, "grad_norm": 7.57731183830557, "learning_rate": 9.09686646828051e-06, "loss": 17.554, "step": 11954 }, { "epoch": 0.21852779351819693, "grad_norm": 6.975978544577265, "learning_rate": 9.096696769429154e-06, "loss": 17.4307, "step": 11955 }, { "epoch": 0.21854607271464346, "grad_norm": 5.449457076461351, "learning_rate": 9.09652705621923e-06, "loss": 17.1161, "step": 11956 }, { "epoch": 0.21856435191109, "grad_norm": 6.9973741598750685, "learning_rate": 9.096357328651337e-06, "loss": 18.023, "step": 11957 }, { "epoch": 0.2185826311075365, "grad_norm": 8.436011815734975, "learning_rate": 9.096187586726064e-06, "loss": 17.7318, "step": 11958 }, { "epoch": 0.21860091030398304, "grad_norm": 7.78096901465431, "learning_rate": 9.09601783044401e-06, "loss": 18.0124, "step": 11959 }, { "epoch": 0.21861918950042955, "grad_norm": 4.859143022196665, "learning_rate": 9.09584805980577e-06, "loss": 16.7077, "step": 11960 }, { "epoch": 0.21863746869687609, "grad_norm": 5.9820407948444725, "learning_rate": 9.095678274811938e-06, "loss": 17.56, "step": 11961 }, { "epoch": 0.21865574789332262, "grad_norm": 6.9941686148540745, "learning_rate": 9.095508475463108e-06, "loss": 17.5618, "step": 11962 }, { "epoch": 0.21867402708976913, "grad_norm": 6.513741789828851, "learning_rate": 9.095338661759879e-06, "loss": 17.6074, "step": 11963 }, { "epoch": 0.21869230628621567, "grad_norm": 6.469130823331481, "learning_rate": 9.09516883370284e-06, "loss": 17.6614, "step": 11964 }, { "epoch": 0.21871058548266217, "grad_norm": 7.339213971916416, "learning_rate": 9.09499899129259e-06, "loss": 17.7564, "step": 11965 }, { "epoch": 0.2187288646791087, "grad_norm": 7.051393632555219, "learning_rate": 9.094829134529726e-06, "loss": 17.7138, "step": 11966 }, { "epoch": 0.21874714387555524, "grad_norm": 6.655667605004137, "learning_rate": 9.094659263414838e-06, "loss": 17.6074, "step": 11967 }, { "epoch": 0.21876542307200175, "grad_norm": 5.697676945789829, "learning_rate": 9.094489377948528e-06, "loss": 17.228, "step": 11968 }, { "epoch": 0.2187837022684483, "grad_norm": 7.858658288547763, "learning_rate": 9.094319478131387e-06, "loss": 18.2695, "step": 11969 }, { "epoch": 0.2188019814648948, "grad_norm": 6.885393288110382, "learning_rate": 9.09414956396401e-06, "loss": 17.5842, "step": 11970 }, { "epoch": 0.21882026066134133, "grad_norm": 7.015131369993315, "learning_rate": 9.093979635446994e-06, "loss": 17.59, "step": 11971 }, { "epoch": 0.21883853985778784, "grad_norm": 7.2145632118926155, "learning_rate": 9.093809692580937e-06, "loss": 17.8017, "step": 11972 }, { "epoch": 0.21885681905423437, "grad_norm": 6.565744924503204, "learning_rate": 9.093639735366431e-06, "loss": 17.5354, "step": 11973 }, { "epoch": 0.2188750982506809, "grad_norm": 7.018434256222989, "learning_rate": 9.093469763804073e-06, "loss": 17.5274, "step": 11974 }, { "epoch": 0.21889337744712742, "grad_norm": 7.556468320286656, "learning_rate": 9.093299777894458e-06, "loss": 17.6066, "step": 11975 }, { "epoch": 0.21891165664357395, "grad_norm": 5.617374179301001, "learning_rate": 9.093129777638183e-06, "loss": 17.1376, "step": 11976 }, { "epoch": 0.21892993584002046, "grad_norm": 8.243979994025754, "learning_rate": 9.092959763035843e-06, "loss": 17.8591, "step": 11977 }, { "epoch": 0.218948215036467, "grad_norm": 6.783744924746423, "learning_rate": 9.092789734088034e-06, "loss": 17.8086, "step": 11978 }, { "epoch": 0.21896649423291353, "grad_norm": 7.659724633477323, "learning_rate": 9.092619690795354e-06, "loss": 18.0718, "step": 11979 }, { "epoch": 0.21898477342936004, "grad_norm": 6.391978236108534, "learning_rate": 9.092449633158395e-06, "loss": 17.5176, "step": 11980 }, { "epoch": 0.21900305262580658, "grad_norm": 6.81729565369, "learning_rate": 9.092279561177758e-06, "loss": 17.6482, "step": 11981 }, { "epoch": 0.21902133182225308, "grad_norm": 6.671017714206008, "learning_rate": 9.092109474854031e-06, "loss": 17.5313, "step": 11982 }, { "epoch": 0.21903961101869962, "grad_norm": 6.339744106747581, "learning_rate": 9.091939374187821e-06, "loss": 17.3638, "step": 11983 }, { "epoch": 0.21905789021514616, "grad_norm": 5.983675068374729, "learning_rate": 9.091769259179715e-06, "loss": 17.229, "step": 11984 }, { "epoch": 0.21907616941159266, "grad_norm": 7.391299335830813, "learning_rate": 9.091599129830313e-06, "loss": 17.9019, "step": 11985 }, { "epoch": 0.2190944486080392, "grad_norm": 6.425807083269966, "learning_rate": 9.091428986140213e-06, "loss": 17.414, "step": 11986 }, { "epoch": 0.2191127278044857, "grad_norm": 5.483876618268429, "learning_rate": 9.091258828110008e-06, "loss": 17.0787, "step": 11987 }, { "epoch": 0.21913100700093224, "grad_norm": 8.051543722042831, "learning_rate": 9.091088655740298e-06, "loss": 17.9003, "step": 11988 }, { "epoch": 0.21914928619737875, "grad_norm": 6.625316110521182, "learning_rate": 9.090918469031676e-06, "loss": 17.5168, "step": 11989 }, { "epoch": 0.2191675653938253, "grad_norm": 6.4224362385000715, "learning_rate": 9.090748267984738e-06, "loss": 17.2853, "step": 11990 }, { "epoch": 0.21918584459027182, "grad_norm": 7.061070925059579, "learning_rate": 9.090578052600082e-06, "loss": 17.6916, "step": 11991 }, { "epoch": 0.21920412378671833, "grad_norm": 6.970553476191607, "learning_rate": 9.090407822878308e-06, "loss": 17.6499, "step": 11992 }, { "epoch": 0.21922240298316487, "grad_norm": 7.158033677684297, "learning_rate": 9.09023757882001e-06, "loss": 17.7665, "step": 11993 }, { "epoch": 0.21924068217961137, "grad_norm": 6.272418194792395, "learning_rate": 9.090067320425782e-06, "loss": 17.4884, "step": 11994 }, { "epoch": 0.2192589613760579, "grad_norm": 7.872646561097602, "learning_rate": 9.089897047696223e-06, "loss": 18.2421, "step": 11995 }, { "epoch": 0.21927724057250444, "grad_norm": 6.128466211050685, "learning_rate": 9.089726760631929e-06, "loss": 17.2306, "step": 11996 }, { "epoch": 0.21929551976895095, "grad_norm": 6.01276693195329, "learning_rate": 9.0895564592335e-06, "loss": 17.2313, "step": 11997 }, { "epoch": 0.2193137989653975, "grad_norm": 7.076087289152299, "learning_rate": 9.089386143501528e-06, "loss": 17.803, "step": 11998 }, { "epoch": 0.219332078161844, "grad_norm": 7.062194248458111, "learning_rate": 9.089215813436614e-06, "loss": 17.7454, "step": 11999 }, { "epoch": 0.21935035735829053, "grad_norm": 5.747517064898039, "learning_rate": 9.089045469039353e-06, "loss": 17.0884, "step": 12000 }, { "epoch": 0.21936863655473707, "grad_norm": 5.707829334885778, "learning_rate": 9.088875110310343e-06, "loss": 17.1666, "step": 12001 }, { "epoch": 0.21938691575118358, "grad_norm": 7.905205923263187, "learning_rate": 9.08870473725018e-06, "loss": 18.4898, "step": 12002 }, { "epoch": 0.2194051949476301, "grad_norm": 7.221712254111787, "learning_rate": 9.088534349859462e-06, "loss": 18.192, "step": 12003 }, { "epoch": 0.21942347414407662, "grad_norm": 7.848459947058525, "learning_rate": 9.088363948138786e-06, "loss": 18.0316, "step": 12004 }, { "epoch": 0.21944175334052315, "grad_norm": 7.562366816646246, "learning_rate": 9.088193532088747e-06, "loss": 18.1611, "step": 12005 }, { "epoch": 0.21946003253696966, "grad_norm": 6.323638346385794, "learning_rate": 9.088023101709946e-06, "loss": 17.3996, "step": 12006 }, { "epoch": 0.2194783117334162, "grad_norm": 7.368769866620946, "learning_rate": 9.08785265700298e-06, "loss": 18.0641, "step": 12007 }, { "epoch": 0.21949659092986273, "grad_norm": 6.762553503819832, "learning_rate": 9.087682197968444e-06, "loss": 17.7905, "step": 12008 }, { "epoch": 0.21951487012630924, "grad_norm": 7.787144860856624, "learning_rate": 9.087511724606936e-06, "loss": 17.965, "step": 12009 }, { "epoch": 0.21953314932275578, "grad_norm": 6.843154924681289, "learning_rate": 9.087341236919055e-06, "loss": 17.6036, "step": 12010 }, { "epoch": 0.21955142851920229, "grad_norm": 6.479468034628051, "learning_rate": 9.087170734905397e-06, "loss": 17.3977, "step": 12011 }, { "epoch": 0.21956970771564882, "grad_norm": 6.161715466614702, "learning_rate": 9.087000218566562e-06, "loss": 17.4078, "step": 12012 }, { "epoch": 0.21958798691209536, "grad_norm": 7.24460315815929, "learning_rate": 9.086829687903144e-06, "loss": 17.5102, "step": 12013 }, { "epoch": 0.21960626610854186, "grad_norm": 7.761451058530412, "learning_rate": 9.086659142915744e-06, "loss": 17.99, "step": 12014 }, { "epoch": 0.2196245453049884, "grad_norm": 5.437564033660677, "learning_rate": 9.086488583604956e-06, "loss": 17.1171, "step": 12015 }, { "epoch": 0.2196428245014349, "grad_norm": 7.523027903179191, "learning_rate": 9.086318009971383e-06, "loss": 17.8936, "step": 12016 }, { "epoch": 0.21966110369788144, "grad_norm": 7.238757594020898, "learning_rate": 9.086147422015617e-06, "loss": 17.6363, "step": 12017 }, { "epoch": 0.21967938289432798, "grad_norm": 6.316033839709729, "learning_rate": 9.085976819738261e-06, "loss": 17.4743, "step": 12018 }, { "epoch": 0.2196976620907745, "grad_norm": 7.593845688623457, "learning_rate": 9.08580620313991e-06, "loss": 17.8691, "step": 12019 }, { "epoch": 0.21971594128722102, "grad_norm": 5.60514414452451, "learning_rate": 9.085635572221163e-06, "loss": 17.2711, "step": 12020 }, { "epoch": 0.21973422048366753, "grad_norm": 6.490622293399584, "learning_rate": 9.08546492698262e-06, "loss": 17.7255, "step": 12021 }, { "epoch": 0.21975249968011407, "grad_norm": 6.909787167782599, "learning_rate": 9.085294267424874e-06, "loss": 17.6954, "step": 12022 }, { "epoch": 0.21977077887656057, "grad_norm": 8.902887335133869, "learning_rate": 9.085123593548526e-06, "loss": 17.6157, "step": 12023 }, { "epoch": 0.2197890580730071, "grad_norm": 5.928585006625503, "learning_rate": 9.084952905354177e-06, "loss": 17.3431, "step": 12024 }, { "epoch": 0.21980733726945365, "grad_norm": 6.630666001768404, "learning_rate": 9.08478220284242e-06, "loss": 17.6577, "step": 12025 }, { "epoch": 0.21982561646590015, "grad_norm": 6.622126466769476, "learning_rate": 9.084611486013857e-06, "loss": 17.795, "step": 12026 }, { "epoch": 0.2198438956623467, "grad_norm": 6.015359889451512, "learning_rate": 9.084440754869085e-06, "loss": 17.2767, "step": 12027 }, { "epoch": 0.2198621748587932, "grad_norm": 5.932151449422499, "learning_rate": 9.084270009408701e-06, "loss": 17.358, "step": 12028 }, { "epoch": 0.21988045405523973, "grad_norm": 6.126648192498557, "learning_rate": 9.084099249633307e-06, "loss": 17.1403, "step": 12029 }, { "epoch": 0.21989873325168627, "grad_norm": 6.819311160305065, "learning_rate": 9.083928475543498e-06, "loss": 17.9101, "step": 12030 }, { "epoch": 0.21991701244813278, "grad_norm": 7.7970206104472615, "learning_rate": 9.083757687139876e-06, "loss": 18.031, "step": 12031 }, { "epoch": 0.2199352916445793, "grad_norm": 8.01782031017913, "learning_rate": 9.083586884423037e-06, "loss": 18.1894, "step": 12032 }, { "epoch": 0.21995357084102582, "grad_norm": 6.155004043893107, "learning_rate": 9.08341606739358e-06, "loss": 17.2879, "step": 12033 }, { "epoch": 0.21997185003747236, "grad_norm": 7.134669392044465, "learning_rate": 9.083245236052103e-06, "loss": 17.8754, "step": 12034 }, { "epoch": 0.2199901292339189, "grad_norm": 9.047554058890503, "learning_rate": 9.083074390399208e-06, "loss": 18.2534, "step": 12035 }, { "epoch": 0.2200084084303654, "grad_norm": 6.879625297549601, "learning_rate": 9.08290353043549e-06, "loss": 17.9168, "step": 12036 }, { "epoch": 0.22002668762681193, "grad_norm": 6.552809811227273, "learning_rate": 9.08273265616155e-06, "loss": 17.797, "step": 12037 }, { "epoch": 0.22004496682325844, "grad_norm": 6.8308385565125285, "learning_rate": 9.082561767577986e-06, "loss": 17.7753, "step": 12038 }, { "epoch": 0.22006324601970498, "grad_norm": 7.742324753665272, "learning_rate": 9.082390864685397e-06, "loss": 17.6787, "step": 12039 }, { "epoch": 0.22008152521615149, "grad_norm": 9.080837449731453, "learning_rate": 9.082219947484383e-06, "loss": 17.7981, "step": 12040 }, { "epoch": 0.22009980441259802, "grad_norm": 6.729636067383213, "learning_rate": 9.082049015975542e-06, "loss": 17.5586, "step": 12041 }, { "epoch": 0.22011808360904456, "grad_norm": 7.289131014769498, "learning_rate": 9.081878070159475e-06, "loss": 17.9244, "step": 12042 }, { "epoch": 0.22013636280549106, "grad_norm": 6.764780826991467, "learning_rate": 9.081707110036777e-06, "loss": 17.8212, "step": 12043 }, { "epoch": 0.2201546420019376, "grad_norm": 4.907240918746769, "learning_rate": 9.081536135608052e-06, "loss": 16.8488, "step": 12044 }, { "epoch": 0.2201729211983841, "grad_norm": 5.758994634397533, "learning_rate": 9.081365146873895e-06, "loss": 17.2962, "step": 12045 }, { "epoch": 0.22019120039483064, "grad_norm": 5.986978869383253, "learning_rate": 9.081194143834908e-06, "loss": 17.2695, "step": 12046 }, { "epoch": 0.22020947959127718, "grad_norm": 6.843056842103075, "learning_rate": 9.08102312649169e-06, "loss": 17.5858, "step": 12047 }, { "epoch": 0.2202277587877237, "grad_norm": 6.659494582954172, "learning_rate": 9.080852094844839e-06, "loss": 17.5676, "step": 12048 }, { "epoch": 0.22024603798417022, "grad_norm": 7.580450822304439, "learning_rate": 9.080681048894957e-06, "loss": 17.8294, "step": 12049 }, { "epoch": 0.22026431718061673, "grad_norm": 6.177606235294419, "learning_rate": 9.080509988642641e-06, "loss": 17.3763, "step": 12050 }, { "epoch": 0.22028259637706327, "grad_norm": 6.720988067004841, "learning_rate": 9.080338914088494e-06, "loss": 17.426, "step": 12051 }, { "epoch": 0.2203008755735098, "grad_norm": 7.5094124068149615, "learning_rate": 9.08016782523311e-06, "loss": 17.7883, "step": 12052 }, { "epoch": 0.2203191547699563, "grad_norm": 7.748640349325527, "learning_rate": 9.079996722077094e-06, "loss": 18.0248, "step": 12053 }, { "epoch": 0.22033743396640285, "grad_norm": 5.936252782832204, "learning_rate": 9.079825604621041e-06, "loss": 17.3862, "step": 12054 }, { "epoch": 0.22035571316284935, "grad_norm": 7.384229712370265, "learning_rate": 9.079654472865556e-06, "loss": 17.8948, "step": 12055 }, { "epoch": 0.2203739923592959, "grad_norm": 6.597747095443318, "learning_rate": 9.079483326811236e-06, "loss": 17.5383, "step": 12056 }, { "epoch": 0.2203922715557424, "grad_norm": 5.842929305692039, "learning_rate": 9.079312166458678e-06, "loss": 17.3827, "step": 12057 }, { "epoch": 0.22041055075218893, "grad_norm": 6.588821379665793, "learning_rate": 9.079140991808488e-06, "loss": 17.5008, "step": 12058 }, { "epoch": 0.22042882994863547, "grad_norm": 6.60250764612619, "learning_rate": 9.078969802861262e-06, "loss": 17.5604, "step": 12059 }, { "epoch": 0.22044710914508198, "grad_norm": 6.2355489714401084, "learning_rate": 9.0787985996176e-06, "loss": 17.0363, "step": 12060 }, { "epoch": 0.2204653883415285, "grad_norm": 6.538423104713626, "learning_rate": 9.078627382078103e-06, "loss": 17.8154, "step": 12061 }, { "epoch": 0.22048366753797502, "grad_norm": 7.022784672294815, "learning_rate": 9.078456150243371e-06, "loss": 17.7635, "step": 12062 }, { "epoch": 0.22050194673442156, "grad_norm": 7.1416528461418, "learning_rate": 9.078284904114005e-06, "loss": 17.7005, "step": 12063 }, { "epoch": 0.2205202259308681, "grad_norm": 6.059971626355186, "learning_rate": 9.078113643690602e-06, "loss": 17.2732, "step": 12064 }, { "epoch": 0.2205385051273146, "grad_norm": 6.5397619475435205, "learning_rate": 9.077942368973767e-06, "loss": 17.8203, "step": 12065 }, { "epoch": 0.22055678432376113, "grad_norm": 6.575952660749232, "learning_rate": 9.077771079964097e-06, "loss": 17.6508, "step": 12066 }, { "epoch": 0.22057506352020764, "grad_norm": 6.674876409356483, "learning_rate": 9.077599776662194e-06, "loss": 17.5094, "step": 12067 }, { "epoch": 0.22059334271665418, "grad_norm": 6.747351806384442, "learning_rate": 9.077428459068656e-06, "loss": 17.6921, "step": 12068 }, { "epoch": 0.22061162191310071, "grad_norm": 8.107350141197397, "learning_rate": 9.077257127184087e-06, "loss": 18.0762, "step": 12069 }, { "epoch": 0.22062990110954722, "grad_norm": 7.087594877867561, "learning_rate": 9.077085781009084e-06, "loss": 17.6351, "step": 12070 }, { "epoch": 0.22064818030599376, "grad_norm": 7.426294762385301, "learning_rate": 9.07691442054425e-06, "loss": 17.9902, "step": 12071 }, { "epoch": 0.22066645950244027, "grad_norm": 6.433734635874548, "learning_rate": 9.076743045790184e-06, "loss": 17.5732, "step": 12072 }, { "epoch": 0.2206847386988868, "grad_norm": 7.418091579853038, "learning_rate": 9.076571656747488e-06, "loss": 17.8136, "step": 12073 }, { "epoch": 0.2207030178953333, "grad_norm": 8.994427748065133, "learning_rate": 9.076400253416762e-06, "loss": 18.7329, "step": 12074 }, { "epoch": 0.22072129709177984, "grad_norm": 6.890757558467735, "learning_rate": 9.076228835798606e-06, "loss": 17.6003, "step": 12075 }, { "epoch": 0.22073957628822638, "grad_norm": 4.939340614448473, "learning_rate": 9.076057403893624e-06, "loss": 16.8055, "step": 12076 }, { "epoch": 0.2207578554846729, "grad_norm": 5.740784737536925, "learning_rate": 9.075885957702411e-06, "loss": 17.1904, "step": 12077 }, { "epoch": 0.22077613468111942, "grad_norm": 5.982820156246118, "learning_rate": 9.075714497225574e-06, "loss": 17.4779, "step": 12078 }, { "epoch": 0.22079441387756593, "grad_norm": 6.580558522174517, "learning_rate": 9.075543022463711e-06, "loss": 17.7942, "step": 12079 }, { "epoch": 0.22081269307401247, "grad_norm": 6.624701415458696, "learning_rate": 9.075371533417423e-06, "loss": 17.4807, "step": 12080 }, { "epoch": 0.220830972270459, "grad_norm": 6.630159754538874, "learning_rate": 9.07520003008731e-06, "loss": 17.6889, "step": 12081 }, { "epoch": 0.2208492514669055, "grad_norm": 6.679633768938046, "learning_rate": 9.075028512473976e-06, "loss": 17.7805, "step": 12082 }, { "epoch": 0.22086753066335205, "grad_norm": 7.804806343191544, "learning_rate": 9.074856980578022e-06, "loss": 17.8208, "step": 12083 }, { "epoch": 0.22088580985979855, "grad_norm": 7.006482526280416, "learning_rate": 9.074685434400046e-06, "loss": 18.0162, "step": 12084 }, { "epoch": 0.2209040890562451, "grad_norm": 7.047766027138001, "learning_rate": 9.074513873940651e-06, "loss": 17.4851, "step": 12085 }, { "epoch": 0.22092236825269163, "grad_norm": 6.3626919655071035, "learning_rate": 9.07434229920044e-06, "loss": 17.3642, "step": 12086 }, { "epoch": 0.22094064744913813, "grad_norm": 7.247062356212498, "learning_rate": 9.07417071018001e-06, "loss": 18.0834, "step": 12087 }, { "epoch": 0.22095892664558467, "grad_norm": 5.822405120819056, "learning_rate": 9.07399910687997e-06, "loss": 17.1425, "step": 12088 }, { "epoch": 0.22097720584203118, "grad_norm": 7.587197075988354, "learning_rate": 9.073827489300913e-06, "loss": 17.8303, "step": 12089 }, { "epoch": 0.2209954850384777, "grad_norm": 6.068602889869737, "learning_rate": 9.073655857443444e-06, "loss": 17.369, "step": 12090 }, { "epoch": 0.22101376423492422, "grad_norm": 7.393926217882674, "learning_rate": 9.073484211308166e-06, "loss": 17.5568, "step": 12091 }, { "epoch": 0.22103204343137076, "grad_norm": 7.637252425495857, "learning_rate": 9.073312550895678e-06, "loss": 17.6237, "step": 12092 }, { "epoch": 0.2210503226278173, "grad_norm": 6.042011046293065, "learning_rate": 9.073140876206585e-06, "loss": 17.231, "step": 12093 }, { "epoch": 0.2210686018242638, "grad_norm": 6.446392263232922, "learning_rate": 9.072969187241484e-06, "loss": 17.4578, "step": 12094 }, { "epoch": 0.22108688102071034, "grad_norm": 6.754235291343457, "learning_rate": 9.072797484000983e-06, "loss": 17.2657, "step": 12095 }, { "epoch": 0.22110516021715684, "grad_norm": 7.994525310983319, "learning_rate": 9.072625766485678e-06, "loss": 18.352, "step": 12096 }, { "epoch": 0.22112343941360338, "grad_norm": 6.907998648106382, "learning_rate": 9.072454034696173e-06, "loss": 17.8207, "step": 12097 }, { "epoch": 0.22114171861004991, "grad_norm": 7.5304715476577275, "learning_rate": 9.07228228863307e-06, "loss": 18.1671, "step": 12098 }, { "epoch": 0.22115999780649642, "grad_norm": 6.864295122027136, "learning_rate": 9.072110528296971e-06, "loss": 17.8149, "step": 12099 }, { "epoch": 0.22117827700294296, "grad_norm": 6.496031378288208, "learning_rate": 9.07193875368848e-06, "loss": 17.5587, "step": 12100 }, { "epoch": 0.22119655619938947, "grad_norm": 6.7719935577797, "learning_rate": 9.071766964808193e-06, "loss": 17.8892, "step": 12101 }, { "epoch": 0.221214835395836, "grad_norm": 6.019303894423978, "learning_rate": 9.071595161656718e-06, "loss": 17.6829, "step": 12102 }, { "epoch": 0.22123311459228254, "grad_norm": 7.398712332894582, "learning_rate": 9.071423344234658e-06, "loss": 18.0845, "step": 12103 }, { "epoch": 0.22125139378872904, "grad_norm": 5.725552928569081, "learning_rate": 9.07125151254261e-06, "loss": 17.1853, "step": 12104 }, { "epoch": 0.22126967298517558, "grad_norm": 5.828800196676859, "learning_rate": 9.071079666581178e-06, "loss": 17.1133, "step": 12105 }, { "epoch": 0.2212879521816221, "grad_norm": 5.561468258385647, "learning_rate": 9.070907806350965e-06, "loss": 16.9982, "step": 12106 }, { "epoch": 0.22130623137806862, "grad_norm": 6.118137601073453, "learning_rate": 9.070735931852575e-06, "loss": 17.4773, "step": 12107 }, { "epoch": 0.22132451057451513, "grad_norm": 6.723948730169479, "learning_rate": 9.07056404308661e-06, "loss": 17.8599, "step": 12108 }, { "epoch": 0.22134278977096167, "grad_norm": 6.251266020611498, "learning_rate": 9.070392140053667e-06, "loss": 17.5272, "step": 12109 }, { "epoch": 0.2213610689674082, "grad_norm": 5.895701484924443, "learning_rate": 9.070220222754356e-06, "loss": 17.346, "step": 12110 }, { "epoch": 0.2213793481638547, "grad_norm": 8.795498461265977, "learning_rate": 9.070048291189276e-06, "loss": 18.1511, "step": 12111 }, { "epoch": 0.22139762736030125, "grad_norm": 7.085636395269582, "learning_rate": 9.06987634535903e-06, "loss": 18.2111, "step": 12112 }, { "epoch": 0.22141590655674775, "grad_norm": 5.200696641321617, "learning_rate": 9.06970438526422e-06, "loss": 17.1305, "step": 12113 }, { "epoch": 0.2214341857531943, "grad_norm": 6.834922203306733, "learning_rate": 9.069532410905448e-06, "loss": 17.7227, "step": 12114 }, { "epoch": 0.22145246494964083, "grad_norm": 6.393837202331609, "learning_rate": 9.06936042228332e-06, "loss": 17.3959, "step": 12115 }, { "epoch": 0.22147074414608733, "grad_norm": 5.5328302722177085, "learning_rate": 9.069188419398437e-06, "loss": 17.0514, "step": 12116 }, { "epoch": 0.22148902334253387, "grad_norm": 8.510829456601536, "learning_rate": 9.0690164022514e-06, "loss": 18.1078, "step": 12117 }, { "epoch": 0.22150730253898038, "grad_norm": 5.904706526852767, "learning_rate": 9.068844370842812e-06, "loss": 17.2968, "step": 12118 }, { "epoch": 0.2215255817354269, "grad_norm": 6.622754466255814, "learning_rate": 9.068672325173282e-06, "loss": 17.389, "step": 12119 }, { "epoch": 0.22154386093187345, "grad_norm": 6.508877281910572, "learning_rate": 9.068500265243407e-06, "loss": 17.6178, "step": 12120 }, { "epoch": 0.22156214012831996, "grad_norm": 5.864018435445338, "learning_rate": 9.06832819105379e-06, "loss": 17.1021, "step": 12121 }, { "epoch": 0.2215804193247665, "grad_norm": 6.403591175170202, "learning_rate": 9.068156102605037e-06, "loss": 17.4996, "step": 12122 }, { "epoch": 0.221598698521213, "grad_norm": 6.990439120209378, "learning_rate": 9.067983999897751e-06, "loss": 17.4864, "step": 12123 }, { "epoch": 0.22161697771765954, "grad_norm": 6.4612132220284915, "learning_rate": 9.067811882932533e-06, "loss": 17.3983, "step": 12124 }, { "epoch": 0.22163525691410604, "grad_norm": 5.932857378409871, "learning_rate": 9.067639751709987e-06, "loss": 17.5504, "step": 12125 }, { "epoch": 0.22165353611055258, "grad_norm": 6.832095320539582, "learning_rate": 9.067467606230717e-06, "loss": 17.6407, "step": 12126 }, { "epoch": 0.22167181530699911, "grad_norm": 5.785721329037084, "learning_rate": 9.067295446495326e-06, "loss": 17.2609, "step": 12127 }, { "epoch": 0.22169009450344562, "grad_norm": 6.631662600339456, "learning_rate": 9.067123272504417e-06, "loss": 17.3671, "step": 12128 }, { "epoch": 0.22170837369989216, "grad_norm": 7.589981984568511, "learning_rate": 9.066951084258593e-06, "loss": 18.2443, "step": 12129 }, { "epoch": 0.22172665289633867, "grad_norm": 5.525196115915716, "learning_rate": 9.06677888175846e-06, "loss": 16.9806, "step": 12130 }, { "epoch": 0.2217449320927852, "grad_norm": 6.955554771941406, "learning_rate": 9.06660666500462e-06, "loss": 17.662, "step": 12131 }, { "epoch": 0.22176321128923174, "grad_norm": 5.991003541933081, "learning_rate": 9.066434433997674e-06, "loss": 17.4138, "step": 12132 }, { "epoch": 0.22178149048567825, "grad_norm": 7.893901139197201, "learning_rate": 9.06626218873823e-06, "loss": 18.0052, "step": 12133 }, { "epoch": 0.22179976968212478, "grad_norm": 7.379640206812513, "learning_rate": 9.066089929226891e-06, "loss": 18.0161, "step": 12134 }, { "epoch": 0.2218180488785713, "grad_norm": 5.8641168083447806, "learning_rate": 9.065917655464258e-06, "loss": 17.3031, "step": 12135 }, { "epoch": 0.22183632807501782, "grad_norm": 6.350075766092297, "learning_rate": 9.065745367450938e-06, "loss": 17.3539, "step": 12136 }, { "epoch": 0.22185460727146436, "grad_norm": 7.8634994124227955, "learning_rate": 9.065573065187531e-06, "loss": 17.9024, "step": 12137 }, { "epoch": 0.22187288646791087, "grad_norm": 5.082605456754155, "learning_rate": 9.065400748674646e-06, "loss": 16.9995, "step": 12138 }, { "epoch": 0.2218911656643574, "grad_norm": 9.111701523707396, "learning_rate": 9.065228417912882e-06, "loss": 18.4247, "step": 12139 }, { "epoch": 0.2219094448608039, "grad_norm": 7.461955655034305, "learning_rate": 9.065056072902847e-06, "loss": 17.6911, "step": 12140 }, { "epoch": 0.22192772405725045, "grad_norm": 5.56744554253036, "learning_rate": 9.06488371364514e-06, "loss": 17.2422, "step": 12141 }, { "epoch": 0.22194600325369696, "grad_norm": 6.874676922587669, "learning_rate": 9.064711340140373e-06, "loss": 17.7655, "step": 12142 }, { "epoch": 0.2219642824501435, "grad_norm": 6.967705772045241, "learning_rate": 9.064538952389141e-06, "loss": 17.7408, "step": 12143 }, { "epoch": 0.22198256164659003, "grad_norm": 5.7354699215764136, "learning_rate": 9.064366550392056e-06, "loss": 17.2371, "step": 12144 }, { "epoch": 0.22200084084303653, "grad_norm": 6.379178656800686, "learning_rate": 9.064194134149718e-06, "loss": 17.4984, "step": 12145 }, { "epoch": 0.22201912003948307, "grad_norm": 6.637973537030778, "learning_rate": 9.064021703662732e-06, "loss": 17.7825, "step": 12146 }, { "epoch": 0.22203739923592958, "grad_norm": 8.032869745002564, "learning_rate": 9.0638492589317e-06, "loss": 18.2834, "step": 12147 }, { "epoch": 0.2220556784323761, "grad_norm": 6.144100501655099, "learning_rate": 9.063676799957231e-06, "loss": 17.0968, "step": 12148 }, { "epoch": 0.22207395762882265, "grad_norm": 6.762697195423152, "learning_rate": 9.063504326739929e-06, "loss": 17.5261, "step": 12149 }, { "epoch": 0.22209223682526916, "grad_norm": 5.334294547815651, "learning_rate": 9.063331839280395e-06, "loss": 17.4522, "step": 12150 }, { "epoch": 0.2221105160217157, "grad_norm": 6.868929711870742, "learning_rate": 9.063159337579238e-06, "loss": 17.7483, "step": 12151 }, { "epoch": 0.2221287952181622, "grad_norm": 6.366454532936392, "learning_rate": 9.062986821637056e-06, "loss": 17.6542, "step": 12152 }, { "epoch": 0.22214707441460874, "grad_norm": 7.9848156345152255, "learning_rate": 9.06281429145446e-06, "loss": 18.2242, "step": 12153 }, { "epoch": 0.22216535361105527, "grad_norm": 6.573086364479184, "learning_rate": 9.062641747032052e-06, "loss": 17.4293, "step": 12154 }, { "epoch": 0.22218363280750178, "grad_norm": 6.234651122800711, "learning_rate": 9.06246918837044e-06, "loss": 17.3009, "step": 12155 }, { "epoch": 0.22220191200394832, "grad_norm": 6.491836262913137, "learning_rate": 9.062296615470223e-06, "loss": 17.6475, "step": 12156 }, { "epoch": 0.22222019120039482, "grad_norm": 5.141991111949409, "learning_rate": 9.062124028332008e-06, "loss": 16.8352, "step": 12157 }, { "epoch": 0.22223847039684136, "grad_norm": 7.045667001321327, "learning_rate": 9.061951426956403e-06, "loss": 17.8412, "step": 12158 }, { "epoch": 0.22225674959328787, "grad_norm": 7.651692323400573, "learning_rate": 9.06177881134401e-06, "loss": 18.0355, "step": 12159 }, { "epoch": 0.2222750287897344, "grad_norm": 7.487448585021054, "learning_rate": 9.061606181495436e-06, "loss": 17.893, "step": 12160 }, { "epoch": 0.22229330798618094, "grad_norm": 7.015425481940219, "learning_rate": 9.061433537411285e-06, "loss": 18.1156, "step": 12161 }, { "epoch": 0.22231158718262745, "grad_norm": 6.962871882681409, "learning_rate": 9.06126087909216e-06, "loss": 17.9275, "step": 12162 }, { "epoch": 0.22232986637907398, "grad_norm": 6.817616740706698, "learning_rate": 9.061088206538668e-06, "loss": 17.5149, "step": 12163 }, { "epoch": 0.2223481455755205, "grad_norm": 6.509896268933717, "learning_rate": 9.060915519751415e-06, "loss": 17.6248, "step": 12164 }, { "epoch": 0.22236642477196703, "grad_norm": 6.315903827237091, "learning_rate": 9.060742818731006e-06, "loss": 17.1711, "step": 12165 }, { "epoch": 0.22238470396841356, "grad_norm": 6.4543926230363935, "learning_rate": 9.060570103478043e-06, "loss": 17.5929, "step": 12166 }, { "epoch": 0.22240298316486007, "grad_norm": 7.083003033265911, "learning_rate": 9.060397373993138e-06, "loss": 18.1269, "step": 12167 }, { "epoch": 0.2224212623613066, "grad_norm": 6.918556123420934, "learning_rate": 9.06022463027689e-06, "loss": 17.7514, "step": 12168 }, { "epoch": 0.2224395415577531, "grad_norm": 6.13862923828019, "learning_rate": 9.060051872329907e-06, "loss": 17.3936, "step": 12169 }, { "epoch": 0.22245782075419965, "grad_norm": 7.548290244094069, "learning_rate": 9.059879100152795e-06, "loss": 18.0276, "step": 12170 }, { "epoch": 0.22247609995064618, "grad_norm": 6.654352827636281, "learning_rate": 9.05970631374616e-06, "loss": 17.6661, "step": 12171 }, { "epoch": 0.2224943791470927, "grad_norm": 6.23502675325729, "learning_rate": 9.059533513110605e-06, "loss": 17.4606, "step": 12172 }, { "epoch": 0.22251265834353923, "grad_norm": 7.595924061961402, "learning_rate": 9.05936069824674e-06, "loss": 18.3619, "step": 12173 }, { "epoch": 0.22253093753998573, "grad_norm": 5.993091462334962, "learning_rate": 9.059187869155167e-06, "loss": 17.4158, "step": 12174 }, { "epoch": 0.22254921673643227, "grad_norm": 5.892020062009522, "learning_rate": 9.05901502583649e-06, "loss": 17.3697, "step": 12175 }, { "epoch": 0.22256749593287878, "grad_norm": 4.909215357289737, "learning_rate": 9.05884216829132e-06, "loss": 16.8452, "step": 12176 }, { "epoch": 0.22258577512932531, "grad_norm": 6.505330308008546, "learning_rate": 9.05866929652026e-06, "loss": 17.6892, "step": 12177 }, { "epoch": 0.22260405432577185, "grad_norm": 7.260690227232895, "learning_rate": 9.058496410523917e-06, "loss": 18.0694, "step": 12178 }, { "epoch": 0.22262233352221836, "grad_norm": 6.559486921726839, "learning_rate": 9.058323510302896e-06, "loss": 17.6832, "step": 12179 }, { "epoch": 0.2226406127186649, "grad_norm": 6.477247629775747, "learning_rate": 9.058150595857803e-06, "loss": 17.4083, "step": 12180 }, { "epoch": 0.2226588919151114, "grad_norm": 6.222961774733105, "learning_rate": 9.057977667189244e-06, "loss": 17.6799, "step": 12181 }, { "epoch": 0.22267717111155794, "grad_norm": 7.0625027289353275, "learning_rate": 9.057804724297825e-06, "loss": 17.8661, "step": 12182 }, { "epoch": 0.22269545030800447, "grad_norm": 6.206804862803218, "learning_rate": 9.057631767184153e-06, "loss": 17.3949, "step": 12183 }, { "epoch": 0.22271372950445098, "grad_norm": 6.557056824978244, "learning_rate": 9.057458795848834e-06, "loss": 17.5059, "step": 12184 }, { "epoch": 0.22273200870089752, "grad_norm": 6.20159662590665, "learning_rate": 9.057285810292474e-06, "loss": 17.278, "step": 12185 }, { "epoch": 0.22275028789734402, "grad_norm": 6.695076245431577, "learning_rate": 9.057112810515681e-06, "loss": 17.743, "step": 12186 }, { "epoch": 0.22276856709379056, "grad_norm": 6.108160305567904, "learning_rate": 9.056939796519056e-06, "loss": 17.4396, "step": 12187 }, { "epoch": 0.2227868462902371, "grad_norm": 9.339105529261538, "learning_rate": 9.056766768303212e-06, "loss": 18.6124, "step": 12188 }, { "epoch": 0.2228051254866836, "grad_norm": 5.530740482200743, "learning_rate": 9.056593725868752e-06, "loss": 16.9295, "step": 12189 }, { "epoch": 0.22282340468313014, "grad_norm": 6.706535134975608, "learning_rate": 9.056420669216281e-06, "loss": 17.8878, "step": 12190 }, { "epoch": 0.22284168387957665, "grad_norm": 5.2416419568065145, "learning_rate": 9.05624759834641e-06, "loss": 17.0756, "step": 12191 }, { "epoch": 0.22285996307602318, "grad_norm": 7.805341729400662, "learning_rate": 9.056074513259742e-06, "loss": 18.2496, "step": 12192 }, { "epoch": 0.2228782422724697, "grad_norm": 7.356348571563792, "learning_rate": 9.055901413956885e-06, "loss": 17.8351, "step": 12193 }, { "epoch": 0.22289652146891623, "grad_norm": 6.1622294015402845, "learning_rate": 9.055728300438445e-06, "loss": 17.1803, "step": 12194 }, { "epoch": 0.22291480066536276, "grad_norm": 6.4323271410674945, "learning_rate": 9.05555517270503e-06, "loss": 17.4918, "step": 12195 }, { "epoch": 0.22293307986180927, "grad_norm": 7.902888915890806, "learning_rate": 9.055382030757244e-06, "loss": 18.0494, "step": 12196 }, { "epoch": 0.2229513590582558, "grad_norm": 14.911929793128163, "learning_rate": 9.0552088745957e-06, "loss": 17.9418, "step": 12197 }, { "epoch": 0.2229696382547023, "grad_norm": 8.190149957916722, "learning_rate": 9.055035704220998e-06, "loss": 18.1531, "step": 12198 }, { "epoch": 0.22298791745114885, "grad_norm": 6.5247192925214375, "learning_rate": 9.054862519633749e-06, "loss": 17.3929, "step": 12199 }, { "epoch": 0.22300619664759538, "grad_norm": 5.65360135672644, "learning_rate": 9.054689320834557e-06, "loss": 16.9583, "step": 12200 }, { "epoch": 0.2230244758440419, "grad_norm": 6.677669658979401, "learning_rate": 9.054516107824031e-06, "loss": 17.7014, "step": 12201 }, { "epoch": 0.22304275504048843, "grad_norm": 6.8350644668940514, "learning_rate": 9.05434288060278e-06, "loss": 17.5601, "step": 12202 }, { "epoch": 0.22306103423693494, "grad_norm": 7.85227969398357, "learning_rate": 9.054169639171407e-06, "loss": 17.7565, "step": 12203 }, { "epoch": 0.22307931343338147, "grad_norm": 6.945638606751802, "learning_rate": 9.05399638353052e-06, "loss": 17.969, "step": 12204 }, { "epoch": 0.223097592629828, "grad_norm": 6.036209937688848, "learning_rate": 9.053823113680731e-06, "loss": 17.3046, "step": 12205 }, { "epoch": 0.22311587182627451, "grad_norm": 6.685623520160601, "learning_rate": 9.053649829622642e-06, "loss": 18.1842, "step": 12206 }, { "epoch": 0.22313415102272105, "grad_norm": 5.7246782458163965, "learning_rate": 9.053476531356861e-06, "loss": 17.2299, "step": 12207 }, { "epoch": 0.22315243021916756, "grad_norm": 7.408503539027606, "learning_rate": 9.053303218883998e-06, "loss": 18.1562, "step": 12208 }, { "epoch": 0.2231707094156141, "grad_norm": 6.427588789991955, "learning_rate": 9.05312989220466e-06, "loss": 17.3499, "step": 12209 }, { "epoch": 0.2231889886120606, "grad_norm": 6.795794685545221, "learning_rate": 9.052956551319452e-06, "loss": 17.3862, "step": 12210 }, { "epoch": 0.22320726780850714, "grad_norm": 7.06147244109122, "learning_rate": 9.052783196228983e-06, "loss": 17.317, "step": 12211 }, { "epoch": 0.22322554700495367, "grad_norm": 6.7282083201337315, "learning_rate": 9.05260982693386e-06, "loss": 17.6491, "step": 12212 }, { "epoch": 0.22324382620140018, "grad_norm": 5.4593299113472495, "learning_rate": 9.05243644343469e-06, "loss": 17.2507, "step": 12213 }, { "epoch": 0.22326210539784672, "grad_norm": 6.109716435642244, "learning_rate": 9.052263045732087e-06, "loss": 17.6383, "step": 12214 }, { "epoch": 0.22328038459429322, "grad_norm": 8.530470453759085, "learning_rate": 9.05208963382665e-06, "loss": 17.5968, "step": 12215 }, { "epoch": 0.22329866379073976, "grad_norm": 6.024941972181284, "learning_rate": 9.05191620771899e-06, "loss": 17.3743, "step": 12216 }, { "epoch": 0.2233169429871863, "grad_norm": 5.780308959116288, "learning_rate": 9.051742767409716e-06, "loss": 16.8978, "step": 12217 }, { "epoch": 0.2233352221836328, "grad_norm": 7.2216651620068735, "learning_rate": 9.051569312899436e-06, "loss": 17.4587, "step": 12218 }, { "epoch": 0.22335350138007934, "grad_norm": 6.794678175019179, "learning_rate": 9.051395844188755e-06, "loss": 17.5773, "step": 12219 }, { "epoch": 0.22337178057652585, "grad_norm": 6.631565450155475, "learning_rate": 9.051222361278286e-06, "loss": 17.563, "step": 12220 }, { "epoch": 0.22339005977297238, "grad_norm": 7.237088734195267, "learning_rate": 9.051048864168632e-06, "loss": 17.6461, "step": 12221 }, { "epoch": 0.22340833896941892, "grad_norm": 5.937021023793204, "learning_rate": 9.050875352860404e-06, "loss": 17.3293, "step": 12222 }, { "epoch": 0.22342661816586543, "grad_norm": 8.196093042757301, "learning_rate": 9.050701827354211e-06, "loss": 17.6361, "step": 12223 }, { "epoch": 0.22344489736231196, "grad_norm": 7.425693450852276, "learning_rate": 9.050528287650657e-06, "loss": 17.8749, "step": 12224 }, { "epoch": 0.22346317655875847, "grad_norm": 5.215058225206581, "learning_rate": 9.050354733750354e-06, "loss": 17.0018, "step": 12225 }, { "epoch": 0.223481455755205, "grad_norm": 5.787247505005862, "learning_rate": 9.05018116565391e-06, "loss": 17.2517, "step": 12226 }, { "epoch": 0.2234997349516515, "grad_norm": 6.5409886645510635, "learning_rate": 9.05000758336193e-06, "loss": 17.6792, "step": 12227 }, { "epoch": 0.22351801414809805, "grad_norm": 8.445251899757077, "learning_rate": 9.049833986875027e-06, "loss": 17.5437, "step": 12228 }, { "epoch": 0.22353629334454458, "grad_norm": 6.821241351948771, "learning_rate": 9.049660376193808e-06, "loss": 17.546, "step": 12229 }, { "epoch": 0.2235545725409911, "grad_norm": 7.823994149490272, "learning_rate": 9.049486751318879e-06, "loss": 17.9458, "step": 12230 }, { "epoch": 0.22357285173743763, "grad_norm": 7.369459589380154, "learning_rate": 9.04931311225085e-06, "loss": 18.0869, "step": 12231 }, { "epoch": 0.22359113093388414, "grad_norm": 7.6500548569966185, "learning_rate": 9.04913945899033e-06, "loss": 17.8878, "step": 12232 }, { "epoch": 0.22360941013033067, "grad_norm": 5.837867098791316, "learning_rate": 9.048965791537929e-06, "loss": 17.042, "step": 12233 }, { "epoch": 0.2236276893267772, "grad_norm": 6.050463393955847, "learning_rate": 9.048792109894253e-06, "loss": 17.1248, "step": 12234 }, { "epoch": 0.22364596852322371, "grad_norm": 8.164927438234022, "learning_rate": 9.048618414059912e-06, "loss": 18.2822, "step": 12235 }, { "epoch": 0.22366424771967025, "grad_norm": 7.768789550138342, "learning_rate": 9.048444704035517e-06, "loss": 18.2395, "step": 12236 }, { "epoch": 0.22368252691611676, "grad_norm": 5.907906867932357, "learning_rate": 9.048270979821673e-06, "loss": 17.217, "step": 12237 }, { "epoch": 0.2237008061125633, "grad_norm": 6.668599665032728, "learning_rate": 9.04809724141899e-06, "loss": 17.6935, "step": 12238 }, { "epoch": 0.22371908530900983, "grad_norm": 6.426863155040541, "learning_rate": 9.047923488828079e-06, "loss": 17.4179, "step": 12239 }, { "epoch": 0.22373736450545634, "grad_norm": 7.489766139729761, "learning_rate": 9.047749722049545e-06, "loss": 17.9008, "step": 12240 }, { "epoch": 0.22375564370190287, "grad_norm": 7.03745251707885, "learning_rate": 9.047575941084002e-06, "loss": 17.8194, "step": 12241 }, { "epoch": 0.22377392289834938, "grad_norm": 8.213014293127971, "learning_rate": 9.047402145932055e-06, "loss": 18.2351, "step": 12242 }, { "epoch": 0.22379220209479592, "grad_norm": 6.809378559607105, "learning_rate": 9.047228336594315e-06, "loss": 17.6302, "step": 12243 }, { "epoch": 0.22381048129124242, "grad_norm": 7.37470269898717, "learning_rate": 9.047054513071391e-06, "loss": 17.9828, "step": 12244 }, { "epoch": 0.22382876048768896, "grad_norm": 11.370524647387033, "learning_rate": 9.046880675363892e-06, "loss": 17.5631, "step": 12245 }, { "epoch": 0.2238470396841355, "grad_norm": 7.798356886013414, "learning_rate": 9.046706823472428e-06, "loss": 18.0636, "step": 12246 }, { "epoch": 0.223865318880582, "grad_norm": 7.08648155761893, "learning_rate": 9.046532957397606e-06, "loss": 17.9467, "step": 12247 }, { "epoch": 0.22388359807702854, "grad_norm": 5.435527706552968, "learning_rate": 9.046359077140039e-06, "loss": 17.0462, "step": 12248 }, { "epoch": 0.22390187727347505, "grad_norm": 6.68293259405961, "learning_rate": 9.046185182700333e-06, "loss": 17.7712, "step": 12249 }, { "epoch": 0.22392015646992158, "grad_norm": 5.368867604488464, "learning_rate": 9.0460112740791e-06, "loss": 17.112, "step": 12250 }, { "epoch": 0.22393843566636812, "grad_norm": 5.77104810479662, "learning_rate": 9.045837351276949e-06, "loss": 17.4235, "step": 12251 }, { "epoch": 0.22395671486281463, "grad_norm": 6.159005156455112, "learning_rate": 9.04566341429449e-06, "loss": 17.606, "step": 12252 }, { "epoch": 0.22397499405926116, "grad_norm": 6.8603474478294215, "learning_rate": 9.04548946313233e-06, "loss": 17.6305, "step": 12253 }, { "epoch": 0.22399327325570767, "grad_norm": 6.893380745047261, "learning_rate": 9.04531549779108e-06, "loss": 17.9018, "step": 12254 }, { "epoch": 0.2240115524521542, "grad_norm": 7.395851504751612, "learning_rate": 9.045141518271352e-06, "loss": 18.0254, "step": 12255 }, { "epoch": 0.22402983164860074, "grad_norm": 7.459036947022944, "learning_rate": 9.044967524573754e-06, "loss": 17.4971, "step": 12256 }, { "epoch": 0.22404811084504725, "grad_norm": 7.4754209209697455, "learning_rate": 9.044793516698894e-06, "loss": 17.8842, "step": 12257 }, { "epoch": 0.22406639004149378, "grad_norm": 6.060827315306458, "learning_rate": 9.044619494647383e-06, "loss": 17.2545, "step": 12258 }, { "epoch": 0.2240846692379403, "grad_norm": 9.437373283210361, "learning_rate": 9.044445458419834e-06, "loss": 18.409, "step": 12259 }, { "epoch": 0.22410294843438683, "grad_norm": 7.125314012595321, "learning_rate": 9.044271408016856e-06, "loss": 17.8622, "step": 12260 }, { "epoch": 0.22412122763083334, "grad_norm": 6.546315968350545, "learning_rate": 9.044097343439055e-06, "loss": 17.2875, "step": 12261 }, { "epoch": 0.22413950682727987, "grad_norm": 6.622025460441975, "learning_rate": 9.043923264687045e-06, "loss": 17.3571, "step": 12262 }, { "epoch": 0.2241577860237264, "grad_norm": 7.090160753684515, "learning_rate": 9.043749171761433e-06, "loss": 17.9867, "step": 12263 }, { "epoch": 0.22417606522017292, "grad_norm": 6.387876368059813, "learning_rate": 9.043575064662833e-06, "loss": 17.4131, "step": 12264 }, { "epoch": 0.22419434441661945, "grad_norm": 5.936893993367334, "learning_rate": 9.043400943391853e-06, "loss": 17.2094, "step": 12265 }, { "epoch": 0.22421262361306596, "grad_norm": 6.055384088286888, "learning_rate": 9.043226807949103e-06, "loss": 17.3158, "step": 12266 }, { "epoch": 0.2242309028095125, "grad_norm": 7.2211548054105, "learning_rate": 9.043052658335195e-06, "loss": 17.7897, "step": 12267 }, { "epoch": 0.22424918200595903, "grad_norm": 6.633570997443187, "learning_rate": 9.042878494550736e-06, "loss": 17.3886, "step": 12268 }, { "epoch": 0.22426746120240554, "grad_norm": 6.899123000691209, "learning_rate": 9.04270431659634e-06, "loss": 17.9087, "step": 12269 }, { "epoch": 0.22428574039885207, "grad_norm": 6.548213007922687, "learning_rate": 9.042530124472617e-06, "loss": 17.3299, "step": 12270 }, { "epoch": 0.22430401959529858, "grad_norm": 6.58743098605056, "learning_rate": 9.042355918180176e-06, "loss": 17.5219, "step": 12271 }, { "epoch": 0.22432229879174512, "grad_norm": 5.714440633680433, "learning_rate": 9.042181697719627e-06, "loss": 17.272, "step": 12272 }, { "epoch": 0.22434057798819165, "grad_norm": 8.0434319343778, "learning_rate": 9.042007463091584e-06, "loss": 18.2251, "step": 12273 }, { "epoch": 0.22435885718463816, "grad_norm": 6.3418512583693545, "learning_rate": 9.041833214296656e-06, "loss": 17.4084, "step": 12274 }, { "epoch": 0.2243771363810847, "grad_norm": 6.476545250193185, "learning_rate": 9.041658951335451e-06, "loss": 17.1878, "step": 12275 }, { "epoch": 0.2243954155775312, "grad_norm": 6.491641118428361, "learning_rate": 9.041484674208584e-06, "loss": 17.5175, "step": 12276 }, { "epoch": 0.22441369477397774, "grad_norm": 7.263383471347915, "learning_rate": 9.041310382916663e-06, "loss": 18.2752, "step": 12277 }, { "epoch": 0.22443197397042425, "grad_norm": 5.8331459930085465, "learning_rate": 9.0411360774603e-06, "loss": 17.2002, "step": 12278 }, { "epoch": 0.22445025316687078, "grad_norm": 5.8998655177738515, "learning_rate": 9.040961757840105e-06, "loss": 17.4966, "step": 12279 }, { "epoch": 0.22446853236331732, "grad_norm": 6.678973847779486, "learning_rate": 9.04078742405669e-06, "loss": 17.5399, "step": 12280 }, { "epoch": 0.22448681155976383, "grad_norm": 8.828607081786217, "learning_rate": 9.040613076110667e-06, "loss": 18.1534, "step": 12281 }, { "epoch": 0.22450509075621036, "grad_norm": 7.563487311099231, "learning_rate": 9.040438714002645e-06, "loss": 18.0516, "step": 12282 }, { "epoch": 0.22452336995265687, "grad_norm": 6.668099309745011, "learning_rate": 9.040264337733236e-06, "loss": 17.7034, "step": 12283 }, { "epoch": 0.2245416491491034, "grad_norm": 7.098032500757102, "learning_rate": 9.04008994730305e-06, "loss": 17.812, "step": 12284 }, { "epoch": 0.22455992834554994, "grad_norm": 6.616043707524381, "learning_rate": 9.0399155427127e-06, "loss": 17.7823, "step": 12285 }, { "epoch": 0.22457820754199645, "grad_norm": 7.516322271528726, "learning_rate": 9.039741123962797e-06, "loss": 17.8506, "step": 12286 }, { "epoch": 0.22459648673844299, "grad_norm": 7.694205402970119, "learning_rate": 9.039566691053952e-06, "loss": 18.3337, "step": 12287 }, { "epoch": 0.2246147659348895, "grad_norm": 7.268622837670842, "learning_rate": 9.039392243986775e-06, "loss": 17.6922, "step": 12288 }, { "epoch": 0.22463304513133603, "grad_norm": 6.224198191373773, "learning_rate": 9.03921778276188e-06, "loss": 17.4293, "step": 12289 }, { "epoch": 0.22465132432778256, "grad_norm": 6.813770995982024, "learning_rate": 9.039043307379878e-06, "loss": 17.2797, "step": 12290 }, { "epoch": 0.22466960352422907, "grad_norm": 7.772614195152835, "learning_rate": 9.038868817841378e-06, "loss": 17.9302, "step": 12291 }, { "epoch": 0.2246878827206756, "grad_norm": 6.295833759594762, "learning_rate": 9.038694314146994e-06, "loss": 17.3379, "step": 12292 }, { "epoch": 0.22470616191712212, "grad_norm": 6.433483661840641, "learning_rate": 9.038519796297336e-06, "loss": 17.4104, "step": 12293 }, { "epoch": 0.22472444111356865, "grad_norm": 6.797406734423923, "learning_rate": 9.038345264293019e-06, "loss": 17.5433, "step": 12294 }, { "epoch": 0.22474272031001516, "grad_norm": 8.778103723823701, "learning_rate": 9.038170718134649e-06, "loss": 18.4851, "step": 12295 }, { "epoch": 0.2247609995064617, "grad_norm": 7.7645876826132225, "learning_rate": 9.037996157822843e-06, "loss": 17.9773, "step": 12296 }, { "epoch": 0.22477927870290823, "grad_norm": 7.250220959212541, "learning_rate": 9.03782158335821e-06, "loss": 17.9881, "step": 12297 }, { "epoch": 0.22479755789935474, "grad_norm": 6.823593498163983, "learning_rate": 9.037646994741362e-06, "loss": 17.6734, "step": 12298 }, { "epoch": 0.22481583709580127, "grad_norm": 6.678320920993797, "learning_rate": 9.037472391972915e-06, "loss": 17.838, "step": 12299 }, { "epoch": 0.22483411629224778, "grad_norm": 6.64005663537683, "learning_rate": 9.037297775053476e-06, "loss": 17.5277, "step": 12300 }, { "epoch": 0.22485239548869432, "grad_norm": 6.95275096027094, "learning_rate": 9.037123143983658e-06, "loss": 17.8921, "step": 12301 }, { "epoch": 0.22487067468514085, "grad_norm": 7.228429314890307, "learning_rate": 9.036948498764071e-06, "loss": 17.8771, "step": 12302 }, { "epoch": 0.22488895388158736, "grad_norm": 7.130853561770472, "learning_rate": 9.036773839395335e-06, "loss": 17.9073, "step": 12303 }, { "epoch": 0.2249072330780339, "grad_norm": 7.2958135662705255, "learning_rate": 9.036599165878053e-06, "loss": 17.8241, "step": 12304 }, { "epoch": 0.2249255122744804, "grad_norm": 6.491199829661077, "learning_rate": 9.036424478212843e-06, "loss": 17.3875, "step": 12305 }, { "epoch": 0.22494379147092694, "grad_norm": 7.487583887635919, "learning_rate": 9.036249776400317e-06, "loss": 17.8521, "step": 12306 }, { "epoch": 0.22496207066737348, "grad_norm": 5.91990847320521, "learning_rate": 9.036075060441083e-06, "loss": 17.1465, "step": 12307 }, { "epoch": 0.22498034986381998, "grad_norm": 16.952346676832793, "learning_rate": 9.035900330335757e-06, "loss": 17.8836, "step": 12308 }, { "epoch": 0.22499862906026652, "grad_norm": 6.955858301762353, "learning_rate": 9.035725586084951e-06, "loss": 17.3048, "step": 12309 }, { "epoch": 0.22501690825671303, "grad_norm": 7.941360770280564, "learning_rate": 9.035550827689276e-06, "loss": 18.2653, "step": 12310 }, { "epoch": 0.22503518745315956, "grad_norm": 8.13819670099608, "learning_rate": 9.035376055149347e-06, "loss": 18.0662, "step": 12311 }, { "epoch": 0.22505346664960607, "grad_norm": 7.437032540865966, "learning_rate": 9.035201268465774e-06, "loss": 17.7133, "step": 12312 }, { "epoch": 0.2250717458460526, "grad_norm": 8.855246124244507, "learning_rate": 9.035026467639172e-06, "loss": 18.5506, "step": 12313 }, { "epoch": 0.22509002504249914, "grad_norm": 7.074037704114128, "learning_rate": 9.034851652670151e-06, "loss": 17.9627, "step": 12314 }, { "epoch": 0.22510830423894565, "grad_norm": 7.773871078629837, "learning_rate": 9.034676823559326e-06, "loss": 17.9938, "step": 12315 }, { "epoch": 0.22512658343539219, "grad_norm": 7.41166943339688, "learning_rate": 9.034501980307309e-06, "loss": 17.877, "step": 12316 }, { "epoch": 0.2251448626318387, "grad_norm": 7.703758649739981, "learning_rate": 9.034327122914711e-06, "loss": 17.9387, "step": 12317 }, { "epoch": 0.22516314182828523, "grad_norm": 6.287787631773602, "learning_rate": 9.034152251382148e-06, "loss": 17.5248, "step": 12318 }, { "epoch": 0.22518142102473176, "grad_norm": 5.991404866001102, "learning_rate": 9.033977365710231e-06, "loss": 17.175, "step": 12319 }, { "epoch": 0.22519970022117827, "grad_norm": 6.737952129673305, "learning_rate": 9.033802465899573e-06, "loss": 17.4521, "step": 12320 }, { "epoch": 0.2252179794176248, "grad_norm": 6.635142538134955, "learning_rate": 9.033627551950788e-06, "loss": 17.5758, "step": 12321 }, { "epoch": 0.22523625861407132, "grad_norm": 6.187063928254991, "learning_rate": 9.03345262386449e-06, "loss": 17.3834, "step": 12322 }, { "epoch": 0.22525453781051785, "grad_norm": 6.854516115810566, "learning_rate": 9.033277681641288e-06, "loss": 17.5332, "step": 12323 }, { "epoch": 0.2252728170069644, "grad_norm": 7.3160421278758445, "learning_rate": 9.033102725281799e-06, "loss": 18.0384, "step": 12324 }, { "epoch": 0.2252910962034109, "grad_norm": 5.726426405407629, "learning_rate": 9.032927754786633e-06, "loss": 17.1843, "step": 12325 }, { "epoch": 0.22530937539985743, "grad_norm": 6.094053491340705, "learning_rate": 9.032752770156408e-06, "loss": 17.4087, "step": 12326 }, { "epoch": 0.22532765459630394, "grad_norm": 7.677299805986886, "learning_rate": 9.032577771391732e-06, "loss": 18.114, "step": 12327 }, { "epoch": 0.22534593379275047, "grad_norm": 6.226787280353511, "learning_rate": 9.032402758493222e-06, "loss": 17.5154, "step": 12328 }, { "epoch": 0.22536421298919698, "grad_norm": 7.993772075133782, "learning_rate": 9.032227731461492e-06, "loss": 18.0567, "step": 12329 }, { "epoch": 0.22538249218564352, "grad_norm": 6.667197143954941, "learning_rate": 9.03205269029715e-06, "loss": 17.7498, "step": 12330 }, { "epoch": 0.22540077138209005, "grad_norm": 6.9567092826670915, "learning_rate": 9.031877635000817e-06, "loss": 17.7729, "step": 12331 }, { "epoch": 0.22541905057853656, "grad_norm": 6.854502695810457, "learning_rate": 9.0317025655731e-06, "loss": 17.7204, "step": 12332 }, { "epoch": 0.2254373297749831, "grad_norm": 6.71738812866057, "learning_rate": 9.031527482014617e-06, "loss": 17.5618, "step": 12333 }, { "epoch": 0.2254556089714296, "grad_norm": 7.189306010353928, "learning_rate": 9.031352384325977e-06, "loss": 17.6072, "step": 12334 }, { "epoch": 0.22547388816787614, "grad_norm": 7.855535097029646, "learning_rate": 9.0311772725078e-06, "loss": 17.5918, "step": 12335 }, { "epoch": 0.22549216736432268, "grad_norm": 6.600393763396325, "learning_rate": 9.031002146560697e-06, "loss": 17.3361, "step": 12336 }, { "epoch": 0.22551044656076918, "grad_norm": 6.748223412611591, "learning_rate": 9.03082700648528e-06, "loss": 17.6995, "step": 12337 }, { "epoch": 0.22552872575721572, "grad_norm": 5.70658566896244, "learning_rate": 9.030651852282164e-06, "loss": 17.1565, "step": 12338 }, { "epoch": 0.22554700495366223, "grad_norm": 6.576046686350633, "learning_rate": 9.030476683951961e-06, "loss": 17.6812, "step": 12339 }, { "epoch": 0.22556528415010876, "grad_norm": 6.747470316806803, "learning_rate": 9.03030150149529e-06, "loss": 17.6554, "step": 12340 }, { "epoch": 0.2255835633465553, "grad_norm": 6.812748807744925, "learning_rate": 9.03012630491276e-06, "loss": 17.8052, "step": 12341 }, { "epoch": 0.2256018425430018, "grad_norm": 6.138326043650591, "learning_rate": 9.029951094204988e-06, "loss": 17.3363, "step": 12342 }, { "epoch": 0.22562012173944834, "grad_norm": 6.25429209699059, "learning_rate": 9.029775869372589e-06, "loss": 17.4272, "step": 12343 }, { "epoch": 0.22563840093589485, "grad_norm": 8.214937102579777, "learning_rate": 9.029600630416171e-06, "loss": 18.3377, "step": 12344 }, { "epoch": 0.2256566801323414, "grad_norm": 6.270352054936354, "learning_rate": 9.029425377336356e-06, "loss": 17.568, "step": 12345 }, { "epoch": 0.2256749593287879, "grad_norm": 5.901018463275873, "learning_rate": 9.029250110133753e-06, "loss": 17.266, "step": 12346 }, { "epoch": 0.22569323852523443, "grad_norm": 6.701032995897633, "learning_rate": 9.02907482880898e-06, "loss": 18.0243, "step": 12347 }, { "epoch": 0.22571151772168097, "grad_norm": 6.966690579019389, "learning_rate": 9.028899533362645e-06, "loss": 17.8246, "step": 12348 }, { "epoch": 0.22572979691812747, "grad_norm": 7.11109379819678, "learning_rate": 9.02872422379537e-06, "loss": 17.9911, "step": 12349 }, { "epoch": 0.225748076114574, "grad_norm": 7.267833352735575, "learning_rate": 9.028548900107767e-06, "loss": 17.9534, "step": 12350 }, { "epoch": 0.22576635531102052, "grad_norm": 6.169634868324523, "learning_rate": 9.028373562300448e-06, "loss": 17.2552, "step": 12351 }, { "epoch": 0.22578463450746705, "grad_norm": 6.5126285468693474, "learning_rate": 9.02819821037403e-06, "loss": 17.5073, "step": 12352 }, { "epoch": 0.2258029137039136, "grad_norm": 7.21379651562743, "learning_rate": 9.028022844329126e-06, "loss": 17.9977, "step": 12353 }, { "epoch": 0.2258211929003601, "grad_norm": 6.5548063039005315, "learning_rate": 9.027847464166353e-06, "loss": 17.8834, "step": 12354 }, { "epoch": 0.22583947209680663, "grad_norm": 7.08098150183937, "learning_rate": 9.027672069886322e-06, "loss": 17.8187, "step": 12355 }, { "epoch": 0.22585775129325314, "grad_norm": 7.038826231934636, "learning_rate": 9.02749666148965e-06, "loss": 17.9689, "step": 12356 }, { "epoch": 0.22587603048969968, "grad_norm": 7.07682686405181, "learning_rate": 9.027321238976954e-06, "loss": 17.8265, "step": 12357 }, { "epoch": 0.2258943096861462, "grad_norm": 7.530074738438792, "learning_rate": 9.027145802348844e-06, "loss": 17.7535, "step": 12358 }, { "epoch": 0.22591258888259272, "grad_norm": 6.446825344866139, "learning_rate": 9.02697035160594e-06, "loss": 17.585, "step": 12359 }, { "epoch": 0.22593086807903925, "grad_norm": 7.637084923790986, "learning_rate": 9.026794886748853e-06, "loss": 18.0022, "step": 12360 }, { "epoch": 0.22594914727548576, "grad_norm": 6.987077479178613, "learning_rate": 9.0266194077782e-06, "loss": 17.7459, "step": 12361 }, { "epoch": 0.2259674264719323, "grad_norm": 6.824769412214328, "learning_rate": 9.026443914694594e-06, "loss": 17.9237, "step": 12362 }, { "epoch": 0.2259857056683788, "grad_norm": 6.348585215954245, "learning_rate": 9.026268407498651e-06, "loss": 17.421, "step": 12363 }, { "epoch": 0.22600398486482534, "grad_norm": 6.102880320893344, "learning_rate": 9.026092886190989e-06, "loss": 17.4555, "step": 12364 }, { "epoch": 0.22602226406127188, "grad_norm": 6.55237701023886, "learning_rate": 9.02591735077222e-06, "loss": 17.4844, "step": 12365 }, { "epoch": 0.22604054325771838, "grad_norm": 6.144109160099827, "learning_rate": 9.025741801242959e-06, "loss": 17.6064, "step": 12366 }, { "epoch": 0.22605882245416492, "grad_norm": 7.027676643650388, "learning_rate": 9.025566237603822e-06, "loss": 17.5832, "step": 12367 }, { "epoch": 0.22607710165061143, "grad_norm": 6.0564480838712775, "learning_rate": 9.025390659855426e-06, "loss": 17.2484, "step": 12368 }, { "epoch": 0.22609538084705796, "grad_norm": 6.531320813353044, "learning_rate": 9.025215067998386e-06, "loss": 17.6086, "step": 12369 }, { "epoch": 0.2261136600435045, "grad_norm": 6.95048430465292, "learning_rate": 9.025039462033314e-06, "loss": 17.643, "step": 12370 }, { "epoch": 0.226131939239951, "grad_norm": 7.44620805011939, "learning_rate": 9.024863841960829e-06, "loss": 17.6497, "step": 12371 }, { "epoch": 0.22615021843639754, "grad_norm": 8.917362870062181, "learning_rate": 9.024688207781547e-06, "loss": 18.8994, "step": 12372 }, { "epoch": 0.22616849763284405, "grad_norm": 7.280697209067718, "learning_rate": 9.02451255949608e-06, "loss": 17.5772, "step": 12373 }, { "epoch": 0.2261867768292906, "grad_norm": 6.702853337006878, "learning_rate": 9.024336897105045e-06, "loss": 17.9045, "step": 12374 }, { "epoch": 0.22620505602573712, "grad_norm": 6.547542319992636, "learning_rate": 9.024161220609061e-06, "loss": 17.392, "step": 12375 }, { "epoch": 0.22622333522218363, "grad_norm": 6.929016110654037, "learning_rate": 9.023985530008742e-06, "loss": 17.8594, "step": 12376 }, { "epoch": 0.22624161441863017, "grad_norm": 5.367073952459153, "learning_rate": 9.023809825304698e-06, "loss": 17.1462, "step": 12377 }, { "epoch": 0.22625989361507667, "grad_norm": 7.988439728945888, "learning_rate": 9.023634106497555e-06, "loss": 17.9267, "step": 12378 }, { "epoch": 0.2262781728115232, "grad_norm": 7.494605964344144, "learning_rate": 9.02345837358792e-06, "loss": 18.0292, "step": 12379 }, { "epoch": 0.22629645200796972, "grad_norm": 5.4240483208814, "learning_rate": 9.023282626576413e-06, "loss": 17.0903, "step": 12380 }, { "epoch": 0.22631473120441625, "grad_norm": 7.704682973963291, "learning_rate": 9.02310686546365e-06, "loss": 17.8397, "step": 12381 }, { "epoch": 0.2263330104008628, "grad_norm": 6.395365992338035, "learning_rate": 9.022931090250247e-06, "loss": 17.623, "step": 12382 }, { "epoch": 0.2263512895973093, "grad_norm": 6.859183186820666, "learning_rate": 9.022755300936821e-06, "loss": 17.7596, "step": 12383 }, { "epoch": 0.22636956879375583, "grad_norm": 5.765596472141772, "learning_rate": 9.022579497523985e-06, "loss": 17.1133, "step": 12384 }, { "epoch": 0.22638784799020234, "grad_norm": 7.140007933470052, "learning_rate": 9.022403680012357e-06, "loss": 17.7501, "step": 12385 }, { "epoch": 0.22640612718664888, "grad_norm": 6.718543300536023, "learning_rate": 9.022227848402552e-06, "loss": 17.6706, "step": 12386 }, { "epoch": 0.2264244063830954, "grad_norm": 5.924532017217724, "learning_rate": 9.02205200269519e-06, "loss": 17.33, "step": 12387 }, { "epoch": 0.22644268557954192, "grad_norm": 8.222872391505957, "learning_rate": 9.021876142890882e-06, "loss": 18.6182, "step": 12388 }, { "epoch": 0.22646096477598845, "grad_norm": 6.734366446496459, "learning_rate": 9.02170026899025e-06, "loss": 17.9106, "step": 12389 }, { "epoch": 0.22647924397243496, "grad_norm": 6.8857777869063055, "learning_rate": 9.021524380993906e-06, "loss": 17.7422, "step": 12390 }, { "epoch": 0.2264975231688815, "grad_norm": 7.125279116793899, "learning_rate": 9.021348478902468e-06, "loss": 17.674, "step": 12391 }, { "epoch": 0.22651580236532803, "grad_norm": 7.436442051858977, "learning_rate": 9.021172562716551e-06, "loss": 17.9451, "step": 12392 }, { "epoch": 0.22653408156177454, "grad_norm": 6.187236880627777, "learning_rate": 9.020996632436775e-06, "loss": 17.5915, "step": 12393 }, { "epoch": 0.22655236075822108, "grad_norm": 5.903576888005673, "learning_rate": 9.020820688063755e-06, "loss": 17.2732, "step": 12394 }, { "epoch": 0.22657063995466759, "grad_norm": 6.413378047315744, "learning_rate": 9.020644729598107e-06, "loss": 17.5233, "step": 12395 }, { "epoch": 0.22658891915111412, "grad_norm": 6.3484233436312, "learning_rate": 9.020468757040449e-06, "loss": 17.7247, "step": 12396 }, { "epoch": 0.22660719834756063, "grad_norm": 5.911804654071197, "learning_rate": 9.020292770391394e-06, "loss": 17.2241, "step": 12397 }, { "epoch": 0.22662547754400716, "grad_norm": 6.724395222585753, "learning_rate": 9.020116769651565e-06, "loss": 17.6779, "step": 12398 }, { "epoch": 0.2266437567404537, "grad_norm": 7.966132708749416, "learning_rate": 9.019940754821574e-06, "loss": 17.897, "step": 12399 }, { "epoch": 0.2266620359369002, "grad_norm": 8.099072340263199, "learning_rate": 9.01976472590204e-06, "loss": 18.1186, "step": 12400 }, { "epoch": 0.22668031513334674, "grad_norm": 5.802227930996529, "learning_rate": 9.01958868289358e-06, "loss": 17.2699, "step": 12401 }, { "epoch": 0.22669859432979325, "grad_norm": 6.146892512705219, "learning_rate": 9.019412625796808e-06, "loss": 17.5141, "step": 12402 }, { "epoch": 0.2267168735262398, "grad_norm": 6.308607104957777, "learning_rate": 9.019236554612346e-06, "loss": 17.3927, "step": 12403 }, { "epoch": 0.22673515272268632, "grad_norm": 8.704926628000624, "learning_rate": 9.019060469340807e-06, "loss": 18.2199, "step": 12404 }, { "epoch": 0.22675343191913283, "grad_norm": 6.7601731358840516, "learning_rate": 9.01888436998281e-06, "loss": 17.7247, "step": 12405 }, { "epoch": 0.22677171111557937, "grad_norm": 7.0988439140350135, "learning_rate": 9.018708256538972e-06, "loss": 17.521, "step": 12406 }, { "epoch": 0.22678999031202587, "grad_norm": 6.072413155833133, "learning_rate": 9.018532129009912e-06, "loss": 17.3915, "step": 12407 }, { "epoch": 0.2268082695084724, "grad_norm": 7.826093537676457, "learning_rate": 9.018355987396244e-06, "loss": 17.8245, "step": 12408 }, { "epoch": 0.22682654870491895, "grad_norm": 6.606313446617124, "learning_rate": 9.018179831698588e-06, "loss": 17.714, "step": 12409 }, { "epoch": 0.22684482790136545, "grad_norm": 8.096086364572532, "learning_rate": 9.01800366191756e-06, "loss": 17.9681, "step": 12410 }, { "epoch": 0.226863107097812, "grad_norm": 6.754049321361079, "learning_rate": 9.017827478053778e-06, "loss": 17.6225, "step": 12411 }, { "epoch": 0.2268813862942585, "grad_norm": 5.726172916773858, "learning_rate": 9.017651280107859e-06, "loss": 17.231, "step": 12412 }, { "epoch": 0.22689966549070503, "grad_norm": 7.080996932121469, "learning_rate": 9.01747506808042e-06, "loss": 17.7147, "step": 12413 }, { "epoch": 0.22691794468715154, "grad_norm": 7.132248945901099, "learning_rate": 9.017298841972082e-06, "loss": 17.9743, "step": 12414 }, { "epoch": 0.22693622388359808, "grad_norm": 5.890459474635145, "learning_rate": 9.017122601783457e-06, "loss": 17.2655, "step": 12415 }, { "epoch": 0.2269545030800446, "grad_norm": 7.9925754588042395, "learning_rate": 9.016946347515168e-06, "loss": 17.7998, "step": 12416 }, { "epoch": 0.22697278227649112, "grad_norm": 6.339927451707241, "learning_rate": 9.016770079167829e-06, "loss": 17.4274, "step": 12417 }, { "epoch": 0.22699106147293766, "grad_norm": 6.3839981397111, "learning_rate": 9.016593796742062e-06, "loss": 17.5346, "step": 12418 }, { "epoch": 0.22700934066938416, "grad_norm": 6.714476029999333, "learning_rate": 9.01641750023848e-06, "loss": 17.6582, "step": 12419 }, { "epoch": 0.2270276198658307, "grad_norm": 8.259896179033946, "learning_rate": 9.016241189657705e-06, "loss": 18.2777, "step": 12420 }, { "epoch": 0.22704589906227723, "grad_norm": 6.835530238088462, "learning_rate": 9.01606486500035e-06, "loss": 17.698, "step": 12421 }, { "epoch": 0.22706417825872374, "grad_norm": 5.3640041829032, "learning_rate": 9.015888526267039e-06, "loss": 17.0343, "step": 12422 }, { "epoch": 0.22708245745517028, "grad_norm": 6.968767675137546, "learning_rate": 9.015712173458387e-06, "loss": 17.7252, "step": 12423 }, { "epoch": 0.22710073665161679, "grad_norm": 7.748829080671614, "learning_rate": 9.01553580657501e-06, "loss": 17.9642, "step": 12424 }, { "epoch": 0.22711901584806332, "grad_norm": 6.653206515480309, "learning_rate": 9.015359425617532e-06, "loss": 17.6293, "step": 12425 }, { "epoch": 0.22713729504450986, "grad_norm": 6.874471388442792, "learning_rate": 9.015183030586565e-06, "loss": 17.8499, "step": 12426 }, { "epoch": 0.22715557424095636, "grad_norm": 6.867013574778568, "learning_rate": 9.015006621482731e-06, "loss": 17.6569, "step": 12427 }, { "epoch": 0.2271738534374029, "grad_norm": 7.165359716422239, "learning_rate": 9.014830198306648e-06, "loss": 18.0228, "step": 12428 }, { "epoch": 0.2271921326338494, "grad_norm": 7.550212146351529, "learning_rate": 9.014653761058932e-06, "loss": 17.9426, "step": 12429 }, { "epoch": 0.22721041183029594, "grad_norm": 5.286133948250526, "learning_rate": 9.014477309740203e-06, "loss": 17.0602, "step": 12430 }, { "epoch": 0.22722869102674245, "grad_norm": 7.310267374318483, "learning_rate": 9.014300844351081e-06, "loss": 17.6793, "step": 12431 }, { "epoch": 0.227246970223189, "grad_norm": 6.601547178421724, "learning_rate": 9.014124364892181e-06, "loss": 17.8393, "step": 12432 }, { "epoch": 0.22726524941963552, "grad_norm": 5.8132377945373905, "learning_rate": 9.013947871364123e-06, "loss": 17.1707, "step": 12433 }, { "epoch": 0.22728352861608203, "grad_norm": 6.378790816808448, "learning_rate": 9.013771363767527e-06, "loss": 17.482, "step": 12434 }, { "epoch": 0.22730180781252857, "grad_norm": 7.174213886845487, "learning_rate": 9.013594842103012e-06, "loss": 17.3914, "step": 12435 }, { "epoch": 0.22732008700897507, "grad_norm": 7.168202336406966, "learning_rate": 9.013418306371194e-06, "loss": 17.7358, "step": 12436 }, { "epoch": 0.2273383662054216, "grad_norm": 6.661420616944232, "learning_rate": 9.013241756572692e-06, "loss": 17.6339, "step": 12437 }, { "epoch": 0.22735664540186815, "grad_norm": 7.098420385874402, "learning_rate": 9.013065192708128e-06, "loss": 17.7022, "step": 12438 }, { "epoch": 0.22737492459831465, "grad_norm": 5.441501533007861, "learning_rate": 9.01288861477812e-06, "loss": 16.9966, "step": 12439 }, { "epoch": 0.2273932037947612, "grad_norm": 7.681352001784799, "learning_rate": 9.012712022783283e-06, "loss": 17.913, "step": 12440 }, { "epoch": 0.2274114829912077, "grad_norm": 8.949349796420888, "learning_rate": 9.012535416724238e-06, "loss": 18.6086, "step": 12441 }, { "epoch": 0.22742976218765423, "grad_norm": 7.739705134628737, "learning_rate": 9.012358796601605e-06, "loss": 18.1888, "step": 12442 }, { "epoch": 0.22744804138410077, "grad_norm": 7.583475393017301, "learning_rate": 9.012182162416003e-06, "loss": 17.5618, "step": 12443 }, { "epoch": 0.22746632058054728, "grad_norm": 7.548928680040296, "learning_rate": 9.012005514168052e-06, "loss": 17.7478, "step": 12444 }, { "epoch": 0.2274845997769938, "grad_norm": 7.358591726462837, "learning_rate": 9.01182885185837e-06, "loss": 17.736, "step": 12445 }, { "epoch": 0.22750287897344032, "grad_norm": 5.2893363235863395, "learning_rate": 9.011652175487574e-06, "loss": 16.9848, "step": 12446 }, { "epoch": 0.22752115816988686, "grad_norm": 6.65548419118315, "learning_rate": 9.011475485056285e-06, "loss": 17.7805, "step": 12447 }, { "epoch": 0.22753943736633336, "grad_norm": 7.441514325461889, "learning_rate": 9.011298780565124e-06, "loss": 17.8744, "step": 12448 }, { "epoch": 0.2275577165627799, "grad_norm": 6.741444606272991, "learning_rate": 9.011122062014709e-06, "loss": 17.8172, "step": 12449 }, { "epoch": 0.22757599575922643, "grad_norm": 8.334131899863701, "learning_rate": 9.010945329405658e-06, "loss": 18.2828, "step": 12450 }, { "epoch": 0.22759427495567294, "grad_norm": 7.322963341330085, "learning_rate": 9.010768582738592e-06, "loss": 17.596, "step": 12451 }, { "epoch": 0.22761255415211948, "grad_norm": 6.027285555663227, "learning_rate": 9.01059182201413e-06, "loss": 17.4733, "step": 12452 }, { "epoch": 0.227630833348566, "grad_norm": 5.202836623511983, "learning_rate": 9.010415047232894e-06, "loss": 17.0539, "step": 12453 }, { "epoch": 0.22764911254501252, "grad_norm": 6.665075150072932, "learning_rate": 9.010238258395498e-06, "loss": 17.7745, "step": 12454 }, { "epoch": 0.22766739174145906, "grad_norm": 8.423251777382154, "learning_rate": 9.010061455502567e-06, "loss": 18.1023, "step": 12455 }, { "epoch": 0.22768567093790557, "grad_norm": 6.547059997079752, "learning_rate": 9.009884638554718e-06, "loss": 17.5038, "step": 12456 }, { "epoch": 0.2277039501343521, "grad_norm": 5.767718578287258, "learning_rate": 9.00970780755257e-06, "loss": 17.2456, "step": 12457 }, { "epoch": 0.2277222293307986, "grad_norm": 7.283933715684057, "learning_rate": 9.009530962496746e-06, "loss": 18.127, "step": 12458 }, { "epoch": 0.22774050852724514, "grad_norm": 5.845148615638448, "learning_rate": 9.009354103387864e-06, "loss": 17.2741, "step": 12459 }, { "epoch": 0.22775878772369168, "grad_norm": 7.51111630909009, "learning_rate": 9.009177230226542e-06, "loss": 18.3568, "step": 12460 }, { "epoch": 0.2277770669201382, "grad_norm": 7.1047743057025095, "learning_rate": 9.009000343013403e-06, "loss": 17.9295, "step": 12461 }, { "epoch": 0.22779534611658472, "grad_norm": 7.938009218971499, "learning_rate": 9.008823441749067e-06, "loss": 17.8752, "step": 12462 }, { "epoch": 0.22781362531303123, "grad_norm": 7.037058314775159, "learning_rate": 9.008646526434151e-06, "loss": 17.6727, "step": 12463 }, { "epoch": 0.22783190450947777, "grad_norm": 6.121077682756427, "learning_rate": 9.008469597069276e-06, "loss": 17.6428, "step": 12464 }, { "epoch": 0.22785018370592428, "grad_norm": 6.593101398863978, "learning_rate": 9.008292653655064e-06, "loss": 17.5556, "step": 12465 }, { "epoch": 0.2278684629023708, "grad_norm": 7.31502964129193, "learning_rate": 9.008115696192133e-06, "loss": 18.0551, "step": 12466 }, { "epoch": 0.22788674209881735, "grad_norm": 6.146276626518445, "learning_rate": 9.007938724681106e-06, "loss": 17.4062, "step": 12467 }, { "epoch": 0.22790502129526385, "grad_norm": 5.469743171696297, "learning_rate": 9.0077617391226e-06, "loss": 17.1875, "step": 12468 }, { "epoch": 0.2279233004917104, "grad_norm": 7.813582794895173, "learning_rate": 9.007584739517237e-06, "loss": 18.0963, "step": 12469 }, { "epoch": 0.2279415796881569, "grad_norm": 7.550237307960634, "learning_rate": 9.007407725865638e-06, "loss": 17.8713, "step": 12470 }, { "epoch": 0.22795985888460343, "grad_norm": 6.20967047309663, "learning_rate": 9.007230698168422e-06, "loss": 17.3119, "step": 12471 }, { "epoch": 0.22797813808104997, "grad_norm": 6.346006043995053, "learning_rate": 9.007053656426213e-06, "loss": 17.5279, "step": 12472 }, { "epoch": 0.22799641727749648, "grad_norm": 6.260159084648033, "learning_rate": 9.006876600639624e-06, "loss": 17.5299, "step": 12473 }, { "epoch": 0.228014696473943, "grad_norm": 6.323058036066158, "learning_rate": 9.006699530809284e-06, "loss": 17.4276, "step": 12474 }, { "epoch": 0.22803297567038952, "grad_norm": 6.19932219235715, "learning_rate": 9.006522446935807e-06, "loss": 17.4824, "step": 12475 }, { "epoch": 0.22805125486683606, "grad_norm": 7.865928942197825, "learning_rate": 9.006345349019818e-06, "loss": 18.0053, "step": 12476 }, { "epoch": 0.2280695340632826, "grad_norm": 6.363254127694012, "learning_rate": 9.006168237061936e-06, "loss": 17.3546, "step": 12477 }, { "epoch": 0.2280878132597291, "grad_norm": 5.913155834157607, "learning_rate": 9.005991111062782e-06, "loss": 17.4868, "step": 12478 }, { "epoch": 0.22810609245617564, "grad_norm": 7.065934559973342, "learning_rate": 9.005813971022977e-06, "loss": 17.7804, "step": 12479 }, { "epoch": 0.22812437165262214, "grad_norm": 6.476520955100705, "learning_rate": 9.005636816943141e-06, "loss": 17.4125, "step": 12480 }, { "epoch": 0.22814265084906868, "grad_norm": 6.216250226614293, "learning_rate": 9.005459648823897e-06, "loss": 17.3022, "step": 12481 }, { "epoch": 0.2281609300455152, "grad_norm": 6.116706968470408, "learning_rate": 9.005282466665864e-06, "loss": 17.5586, "step": 12482 }, { "epoch": 0.22817920924196172, "grad_norm": 6.999228409098107, "learning_rate": 9.005105270469663e-06, "loss": 17.8334, "step": 12483 }, { "epoch": 0.22819748843840826, "grad_norm": 7.125612598589452, "learning_rate": 9.004928060235915e-06, "loss": 17.7083, "step": 12484 }, { "epoch": 0.22821576763485477, "grad_norm": 7.8696084480006085, "learning_rate": 9.004750835965241e-06, "loss": 18.3996, "step": 12485 }, { "epoch": 0.2282340468313013, "grad_norm": 6.2676428832425, "learning_rate": 9.004573597658265e-06, "loss": 17.313, "step": 12486 }, { "epoch": 0.2282523260277478, "grad_norm": 7.765047865395796, "learning_rate": 9.004396345315604e-06, "loss": 17.9367, "step": 12487 }, { "epoch": 0.22827060522419435, "grad_norm": 6.9273134841788035, "learning_rate": 9.004219078937883e-06, "loss": 17.5964, "step": 12488 }, { "epoch": 0.22828888442064088, "grad_norm": 6.450689977922569, "learning_rate": 9.004041798525723e-06, "loss": 17.374, "step": 12489 }, { "epoch": 0.2283071636170874, "grad_norm": 5.876191355914733, "learning_rate": 9.00386450407974e-06, "loss": 17.3217, "step": 12490 }, { "epoch": 0.22832544281353392, "grad_norm": 6.824802720193081, "learning_rate": 9.003687195600561e-06, "loss": 17.6096, "step": 12491 }, { "epoch": 0.22834372200998043, "grad_norm": 6.326670893919381, "learning_rate": 9.003509873088806e-06, "loss": 17.4869, "step": 12492 }, { "epoch": 0.22836200120642697, "grad_norm": 6.080616837358299, "learning_rate": 9.003332536545097e-06, "loss": 17.6232, "step": 12493 }, { "epoch": 0.2283802804028735, "grad_norm": 6.481898583726981, "learning_rate": 9.003155185970055e-06, "loss": 17.6116, "step": 12494 }, { "epoch": 0.22839855959932, "grad_norm": 7.2780683463881495, "learning_rate": 9.0029778213643e-06, "loss": 17.7783, "step": 12495 }, { "epoch": 0.22841683879576655, "grad_norm": 7.187827518993483, "learning_rate": 9.002800442728456e-06, "loss": 17.9959, "step": 12496 }, { "epoch": 0.22843511799221305, "grad_norm": 6.31378587333316, "learning_rate": 9.002623050063144e-06, "loss": 17.3304, "step": 12497 }, { "epoch": 0.2284533971886596, "grad_norm": 6.371838918299203, "learning_rate": 9.002445643368985e-06, "loss": 17.5231, "step": 12498 }, { "epoch": 0.2284716763851061, "grad_norm": 6.670629055001435, "learning_rate": 9.002268222646602e-06, "loss": 17.683, "step": 12499 }, { "epoch": 0.22848995558155263, "grad_norm": 7.111535221045845, "learning_rate": 9.002090787896616e-06, "loss": 17.6803, "step": 12500 }, { "epoch": 0.22850823477799917, "grad_norm": 6.907882759243566, "learning_rate": 9.001913339119647e-06, "loss": 17.8974, "step": 12501 }, { "epoch": 0.22852651397444568, "grad_norm": 6.7099161801259966, "learning_rate": 9.001735876316323e-06, "loss": 17.4129, "step": 12502 }, { "epoch": 0.2285447931708922, "grad_norm": 6.24639562168642, "learning_rate": 9.001558399487257e-06, "loss": 17.2618, "step": 12503 }, { "epoch": 0.22856307236733872, "grad_norm": 6.191726897974517, "learning_rate": 9.00138090863308e-06, "loss": 17.3679, "step": 12504 }, { "epoch": 0.22858135156378526, "grad_norm": 6.494394177223834, "learning_rate": 9.00120340375441e-06, "loss": 17.8027, "step": 12505 }, { "epoch": 0.2285996307602318, "grad_norm": 6.973080184641169, "learning_rate": 9.001025884851868e-06, "loss": 17.6395, "step": 12506 }, { "epoch": 0.2286179099566783, "grad_norm": 6.050184229241976, "learning_rate": 9.000848351926077e-06, "loss": 17.1681, "step": 12507 }, { "epoch": 0.22863618915312484, "grad_norm": 6.583322560980222, "learning_rate": 9.000670804977661e-06, "loss": 17.5065, "step": 12508 }, { "epoch": 0.22865446834957134, "grad_norm": 8.761571713223706, "learning_rate": 9.00049324400724e-06, "loss": 18.2826, "step": 12509 }, { "epoch": 0.22867274754601788, "grad_norm": 10.445797901169518, "learning_rate": 9.000315669015438e-06, "loss": 18.0063, "step": 12510 }, { "epoch": 0.22869102674246441, "grad_norm": 7.187926896437648, "learning_rate": 9.000138080002876e-06, "loss": 17.5907, "step": 12511 }, { "epoch": 0.22870930593891092, "grad_norm": 8.741326794947055, "learning_rate": 8.999960476970178e-06, "loss": 18.0142, "step": 12512 }, { "epoch": 0.22872758513535746, "grad_norm": 5.776068892469077, "learning_rate": 8.999782859917966e-06, "loss": 17.1766, "step": 12513 }, { "epoch": 0.22874586433180397, "grad_norm": 6.171879540219162, "learning_rate": 8.99960522884686e-06, "loss": 17.3487, "step": 12514 }, { "epoch": 0.2287641435282505, "grad_norm": 6.41565242675409, "learning_rate": 8.999427583757487e-06, "loss": 17.6119, "step": 12515 }, { "epoch": 0.228782422724697, "grad_norm": 6.044914902041561, "learning_rate": 8.999249924650467e-06, "loss": 17.156, "step": 12516 }, { "epoch": 0.22880070192114355, "grad_norm": 6.738196133000485, "learning_rate": 8.999072251526422e-06, "loss": 17.8699, "step": 12517 }, { "epoch": 0.22881898111759008, "grad_norm": 6.419311646700771, "learning_rate": 8.998894564385976e-06, "loss": 17.4026, "step": 12518 }, { "epoch": 0.2288372603140366, "grad_norm": 6.392433973015512, "learning_rate": 8.998716863229753e-06, "loss": 17.6232, "step": 12519 }, { "epoch": 0.22885553951048312, "grad_norm": 5.8958085404852865, "learning_rate": 8.998539148058371e-06, "loss": 17.3042, "step": 12520 }, { "epoch": 0.22887381870692963, "grad_norm": 6.74062692196872, "learning_rate": 8.99836141887246e-06, "loss": 17.7199, "step": 12521 }, { "epoch": 0.22889209790337617, "grad_norm": 6.636453772326242, "learning_rate": 8.998183675672639e-06, "loss": 17.727, "step": 12522 }, { "epoch": 0.2289103770998227, "grad_norm": 6.151624342410829, "learning_rate": 8.998005918459529e-06, "loss": 17.3302, "step": 12523 }, { "epoch": 0.2289286562962692, "grad_norm": 7.524741787304753, "learning_rate": 8.997828147233756e-06, "loss": 18.1206, "step": 12524 }, { "epoch": 0.22894693549271575, "grad_norm": 7.0537688901822415, "learning_rate": 8.997650361995942e-06, "loss": 17.8464, "step": 12525 }, { "epoch": 0.22896521468916226, "grad_norm": 6.47024756749618, "learning_rate": 8.997472562746711e-06, "loss": 17.3939, "step": 12526 }, { "epoch": 0.2289834938856088, "grad_norm": 7.684796251635377, "learning_rate": 8.997294749486685e-06, "loss": 18.0094, "step": 12527 }, { "epoch": 0.22900177308205533, "grad_norm": 7.475698948068777, "learning_rate": 8.997116922216487e-06, "loss": 17.8596, "step": 12528 }, { "epoch": 0.22902005227850183, "grad_norm": 7.102665538397156, "learning_rate": 8.996939080936743e-06, "loss": 17.4937, "step": 12529 }, { "epoch": 0.22903833147494837, "grad_norm": 6.416082285582877, "learning_rate": 8.996761225648074e-06, "loss": 17.3821, "step": 12530 }, { "epoch": 0.22905661067139488, "grad_norm": 6.281991184229925, "learning_rate": 8.996583356351103e-06, "loss": 17.4101, "step": 12531 }, { "epoch": 0.2290748898678414, "grad_norm": 5.685979659910896, "learning_rate": 8.996405473046455e-06, "loss": 17.2703, "step": 12532 }, { "epoch": 0.22909316906428792, "grad_norm": 7.809788721353094, "learning_rate": 8.996227575734751e-06, "loss": 17.7103, "step": 12533 }, { "epoch": 0.22911144826073446, "grad_norm": 5.807257328034285, "learning_rate": 8.996049664416617e-06, "loss": 17.0531, "step": 12534 }, { "epoch": 0.229129727457181, "grad_norm": 6.141165254363331, "learning_rate": 8.995871739092676e-06, "loss": 17.3797, "step": 12535 }, { "epoch": 0.2291480066536275, "grad_norm": 6.562547748460574, "learning_rate": 8.99569379976355e-06, "loss": 17.626, "step": 12536 }, { "epoch": 0.22916628585007404, "grad_norm": 6.177743602828855, "learning_rate": 8.995515846429865e-06, "loss": 17.4222, "step": 12537 }, { "epoch": 0.22918456504652054, "grad_norm": 6.947711500882194, "learning_rate": 8.995337879092244e-06, "loss": 17.5877, "step": 12538 }, { "epoch": 0.22920284424296708, "grad_norm": 6.481432353025655, "learning_rate": 8.995159897751311e-06, "loss": 17.5561, "step": 12539 }, { "epoch": 0.22922112343941362, "grad_norm": 6.853642406435706, "learning_rate": 8.994981902407688e-06, "loss": 17.5894, "step": 12540 }, { "epoch": 0.22923940263586012, "grad_norm": 6.96098333316694, "learning_rate": 8.994803893062e-06, "loss": 17.7678, "step": 12541 }, { "epoch": 0.22925768183230666, "grad_norm": 7.911748352299507, "learning_rate": 8.994625869714872e-06, "loss": 17.9697, "step": 12542 }, { "epoch": 0.22927596102875317, "grad_norm": 8.921371968463037, "learning_rate": 8.994447832366926e-06, "loss": 18.1782, "step": 12543 }, { "epoch": 0.2292942402251997, "grad_norm": 7.279355339017593, "learning_rate": 8.994269781018787e-06, "loss": 17.3983, "step": 12544 }, { "epoch": 0.22931251942164624, "grad_norm": 7.475994898745621, "learning_rate": 8.994091715671081e-06, "loss": 18.0412, "step": 12545 }, { "epoch": 0.22933079861809275, "grad_norm": 8.479527737811965, "learning_rate": 8.993913636324427e-06, "loss": 18.1023, "step": 12546 }, { "epoch": 0.22934907781453928, "grad_norm": 5.878535855809932, "learning_rate": 8.993735542979453e-06, "loss": 17.0986, "step": 12547 }, { "epoch": 0.2293673570109858, "grad_norm": 5.2999987002435, "learning_rate": 8.993557435636784e-06, "loss": 16.8318, "step": 12548 }, { "epoch": 0.22938563620743233, "grad_norm": 8.142359630907281, "learning_rate": 8.993379314297042e-06, "loss": 18.0559, "step": 12549 }, { "epoch": 0.22940391540387883, "grad_norm": 5.272730622394096, "learning_rate": 8.993201178960853e-06, "loss": 17.0443, "step": 12550 }, { "epoch": 0.22942219460032537, "grad_norm": 5.915003041650118, "learning_rate": 8.99302302962884e-06, "loss": 17.2106, "step": 12551 }, { "epoch": 0.2294404737967719, "grad_norm": 5.603933753552654, "learning_rate": 8.992844866301627e-06, "loss": 17.2869, "step": 12552 }, { "epoch": 0.2294587529932184, "grad_norm": 6.60340195412723, "learning_rate": 8.992666688979838e-06, "loss": 17.4153, "step": 12553 }, { "epoch": 0.22947703218966495, "grad_norm": 7.213417414091133, "learning_rate": 8.992488497664101e-06, "loss": 17.777, "step": 12554 }, { "epoch": 0.22949531138611146, "grad_norm": 7.264297281644561, "learning_rate": 8.992310292355037e-06, "loss": 17.8815, "step": 12555 }, { "epoch": 0.229513590582558, "grad_norm": 7.399917509915868, "learning_rate": 8.992132073053272e-06, "loss": 18.0332, "step": 12556 }, { "epoch": 0.22953186977900453, "grad_norm": 6.5106449740748715, "learning_rate": 8.991953839759432e-06, "loss": 17.5683, "step": 12557 }, { "epoch": 0.22955014897545103, "grad_norm": 6.162216353065298, "learning_rate": 8.99177559247414e-06, "loss": 17.4597, "step": 12558 }, { "epoch": 0.22956842817189757, "grad_norm": 6.409297331701278, "learning_rate": 8.991597331198018e-06, "loss": 17.5378, "step": 12559 }, { "epoch": 0.22958670736834408, "grad_norm": 6.79721675397959, "learning_rate": 8.991419055931697e-06, "loss": 17.4977, "step": 12560 }, { "epoch": 0.22960498656479061, "grad_norm": 5.680684048615769, "learning_rate": 8.991240766675798e-06, "loss": 17.1447, "step": 12561 }, { "epoch": 0.22962326576123715, "grad_norm": 7.109729819431134, "learning_rate": 8.991062463430943e-06, "loss": 17.5235, "step": 12562 }, { "epoch": 0.22964154495768366, "grad_norm": 6.663936086487505, "learning_rate": 8.990884146197765e-06, "loss": 17.5464, "step": 12563 }, { "epoch": 0.2296598241541302, "grad_norm": 7.0581609461212835, "learning_rate": 8.990705814976883e-06, "loss": 17.9784, "step": 12564 }, { "epoch": 0.2296781033505767, "grad_norm": 6.513759642232706, "learning_rate": 8.990527469768921e-06, "loss": 17.4535, "step": 12565 }, { "epoch": 0.22969638254702324, "grad_norm": 7.505513390026089, "learning_rate": 8.99034911057451e-06, "loss": 17.7397, "step": 12566 }, { "epoch": 0.22971466174346974, "grad_norm": 6.137401951152139, "learning_rate": 8.99017073739427e-06, "loss": 17.4381, "step": 12567 }, { "epoch": 0.22973294093991628, "grad_norm": 6.098398579223316, "learning_rate": 8.989992350228827e-06, "loss": 17.3569, "step": 12568 }, { "epoch": 0.22975122013636282, "grad_norm": 5.639631094059768, "learning_rate": 8.989813949078808e-06, "loss": 17.3075, "step": 12569 }, { "epoch": 0.22976949933280932, "grad_norm": 6.526841461697767, "learning_rate": 8.989635533944837e-06, "loss": 17.5349, "step": 12570 }, { "epoch": 0.22978777852925586, "grad_norm": 7.68988900944073, "learning_rate": 8.98945710482754e-06, "loss": 18.0816, "step": 12571 }, { "epoch": 0.22980605772570237, "grad_norm": 5.977019301105379, "learning_rate": 8.989278661727541e-06, "loss": 17.3921, "step": 12572 }, { "epoch": 0.2298243369221489, "grad_norm": 6.478667226965484, "learning_rate": 8.989100204645469e-06, "loss": 17.3424, "step": 12573 }, { "epoch": 0.22984261611859544, "grad_norm": 7.289630488656758, "learning_rate": 8.988921733581944e-06, "loss": 17.7558, "step": 12574 }, { "epoch": 0.22986089531504195, "grad_norm": 7.0332537951974805, "learning_rate": 8.988743248537597e-06, "loss": 17.9819, "step": 12575 }, { "epoch": 0.22987917451148848, "grad_norm": 7.884691853844178, "learning_rate": 8.988564749513048e-06, "loss": 17.991, "step": 12576 }, { "epoch": 0.229897453707935, "grad_norm": 6.5376790791049375, "learning_rate": 8.988386236508928e-06, "loss": 17.553, "step": 12577 }, { "epoch": 0.22991573290438153, "grad_norm": 7.066783121330573, "learning_rate": 8.98820770952586e-06, "loss": 17.7031, "step": 12578 }, { "epoch": 0.22993401210082806, "grad_norm": 7.013659824042925, "learning_rate": 8.988029168564471e-06, "loss": 17.97, "step": 12579 }, { "epoch": 0.22995229129727457, "grad_norm": 6.74694556488098, "learning_rate": 8.987850613625384e-06, "loss": 17.5762, "step": 12580 }, { "epoch": 0.2299705704937211, "grad_norm": 6.3388721790425455, "learning_rate": 8.987672044709228e-06, "loss": 17.5761, "step": 12581 }, { "epoch": 0.2299888496901676, "grad_norm": 7.183547300713726, "learning_rate": 8.987493461816626e-06, "loss": 17.6874, "step": 12582 }, { "epoch": 0.23000712888661415, "grad_norm": 6.9699117910774175, "learning_rate": 8.987314864948207e-06, "loss": 17.8068, "step": 12583 }, { "epoch": 0.23002540808306066, "grad_norm": 5.608276462681504, "learning_rate": 8.987136254104594e-06, "loss": 17.2474, "step": 12584 }, { "epoch": 0.2300436872795072, "grad_norm": 6.677230612371447, "learning_rate": 8.986957629286416e-06, "loss": 17.5887, "step": 12585 }, { "epoch": 0.23006196647595373, "grad_norm": 6.837957475119126, "learning_rate": 8.986778990494296e-06, "loss": 17.5032, "step": 12586 }, { "epoch": 0.23008024567240024, "grad_norm": 7.362353618173081, "learning_rate": 8.986600337728863e-06, "loss": 17.7967, "step": 12587 }, { "epoch": 0.23009852486884677, "grad_norm": 7.822005971236524, "learning_rate": 8.98642167099074e-06, "loss": 17.9193, "step": 12588 }, { "epoch": 0.23011680406529328, "grad_norm": 7.1110178575429694, "learning_rate": 8.986242990280556e-06, "loss": 17.1459, "step": 12589 }, { "epoch": 0.23013508326173981, "grad_norm": 7.040891386515305, "learning_rate": 8.986064295598937e-06, "loss": 17.6653, "step": 12590 }, { "epoch": 0.23015336245818635, "grad_norm": 5.6072354844521906, "learning_rate": 8.985885586946507e-06, "loss": 16.9168, "step": 12591 }, { "epoch": 0.23017164165463286, "grad_norm": 6.016958462413485, "learning_rate": 8.985706864323896e-06, "loss": 17.1112, "step": 12592 }, { "epoch": 0.2301899208510794, "grad_norm": 7.227371297609713, "learning_rate": 8.985528127731727e-06, "loss": 17.474, "step": 12593 }, { "epoch": 0.2302082000475259, "grad_norm": 6.279187554419717, "learning_rate": 8.985349377170626e-06, "loss": 17.4538, "step": 12594 }, { "epoch": 0.23022647924397244, "grad_norm": 6.718872122911122, "learning_rate": 8.985170612641222e-06, "loss": 17.6669, "step": 12595 }, { "epoch": 0.23024475844041897, "grad_norm": 6.508252219883713, "learning_rate": 8.984991834144143e-06, "loss": 17.583, "step": 12596 }, { "epoch": 0.23026303763686548, "grad_norm": 5.965840370153033, "learning_rate": 8.984813041680013e-06, "loss": 17.1702, "step": 12597 }, { "epoch": 0.23028131683331202, "grad_norm": 6.235020004113385, "learning_rate": 8.984634235249457e-06, "loss": 17.2796, "step": 12598 }, { "epoch": 0.23029959602975852, "grad_norm": 7.665610962559877, "learning_rate": 8.984455414853106e-06, "loss": 18.0396, "step": 12599 }, { "epoch": 0.23031787522620506, "grad_norm": 6.81995482604741, "learning_rate": 8.984276580491585e-06, "loss": 17.6362, "step": 12600 }, { "epoch": 0.23033615442265157, "grad_norm": 7.102639361588913, "learning_rate": 8.984097732165518e-06, "loss": 17.8958, "step": 12601 }, { "epoch": 0.2303544336190981, "grad_norm": 5.969602264257139, "learning_rate": 8.983918869875535e-06, "loss": 17.3347, "step": 12602 }, { "epoch": 0.23037271281554464, "grad_norm": 7.001558828077685, "learning_rate": 8.983739993622262e-06, "loss": 17.5424, "step": 12603 }, { "epoch": 0.23039099201199115, "grad_norm": 6.195951514051619, "learning_rate": 8.983561103406326e-06, "loss": 17.281, "step": 12604 }, { "epoch": 0.23040927120843768, "grad_norm": 6.406386080044461, "learning_rate": 8.983382199228355e-06, "loss": 17.5129, "step": 12605 }, { "epoch": 0.2304275504048842, "grad_norm": 6.80794578612935, "learning_rate": 8.983203281088972e-06, "loss": 17.8294, "step": 12606 }, { "epoch": 0.23044582960133073, "grad_norm": 7.146491191474552, "learning_rate": 8.983024348988812e-06, "loss": 17.8156, "step": 12607 }, { "epoch": 0.23046410879777726, "grad_norm": 6.658368575968671, "learning_rate": 8.982845402928492e-06, "loss": 17.831, "step": 12608 }, { "epoch": 0.23048238799422377, "grad_norm": 7.188384913391123, "learning_rate": 8.982666442908647e-06, "loss": 17.928, "step": 12609 }, { "epoch": 0.2305006671906703, "grad_norm": 7.181366336026206, "learning_rate": 8.982487468929903e-06, "loss": 17.5838, "step": 12610 }, { "epoch": 0.2305189463871168, "grad_norm": 6.113867339288262, "learning_rate": 8.982308480992886e-06, "loss": 17.1665, "step": 12611 }, { "epoch": 0.23053722558356335, "grad_norm": 6.6525692485777075, "learning_rate": 8.982129479098221e-06, "loss": 17.2119, "step": 12612 }, { "epoch": 0.23055550478000988, "grad_norm": 6.1259140723075545, "learning_rate": 8.981950463246538e-06, "loss": 17.3334, "step": 12613 }, { "epoch": 0.2305737839764564, "grad_norm": 6.034860566559422, "learning_rate": 8.981771433438467e-06, "loss": 17.1934, "step": 12614 }, { "epoch": 0.23059206317290293, "grad_norm": 6.124692958782826, "learning_rate": 8.98159238967463e-06, "loss": 17.2687, "step": 12615 }, { "epoch": 0.23061034236934944, "grad_norm": 6.116606685379102, "learning_rate": 8.981413331955657e-06, "loss": 17.4827, "step": 12616 }, { "epoch": 0.23062862156579597, "grad_norm": 6.836158558654621, "learning_rate": 8.981234260282177e-06, "loss": 17.7202, "step": 12617 }, { "epoch": 0.23064690076224248, "grad_norm": 6.262228607960215, "learning_rate": 8.981055174654815e-06, "loss": 17.3742, "step": 12618 }, { "epoch": 0.23066517995868902, "grad_norm": 6.4218464165121825, "learning_rate": 8.980876075074202e-06, "loss": 17.4499, "step": 12619 }, { "epoch": 0.23068345915513555, "grad_norm": 6.699852881167036, "learning_rate": 8.980696961540964e-06, "loss": 17.7633, "step": 12620 }, { "epoch": 0.23070173835158206, "grad_norm": 6.044320608620665, "learning_rate": 8.980517834055728e-06, "loss": 17.2964, "step": 12621 }, { "epoch": 0.2307200175480286, "grad_norm": 8.171391327262505, "learning_rate": 8.980338692619122e-06, "loss": 17.8528, "step": 12622 }, { "epoch": 0.2307382967444751, "grad_norm": 6.700840355458898, "learning_rate": 8.980159537231774e-06, "loss": 17.5962, "step": 12623 }, { "epoch": 0.23075657594092164, "grad_norm": 7.140536927571538, "learning_rate": 8.979980367894313e-06, "loss": 17.5118, "step": 12624 }, { "epoch": 0.23077485513736817, "grad_norm": 6.444970035759333, "learning_rate": 8.979801184607364e-06, "loss": 17.3046, "step": 12625 }, { "epoch": 0.23079313433381468, "grad_norm": 5.836426830078904, "learning_rate": 8.97962198737156e-06, "loss": 17.4554, "step": 12626 }, { "epoch": 0.23081141353026122, "grad_norm": 6.501548222905501, "learning_rate": 8.979442776187524e-06, "loss": 17.4258, "step": 12627 }, { "epoch": 0.23082969272670772, "grad_norm": 8.510282767251702, "learning_rate": 8.979263551055887e-06, "loss": 18.5991, "step": 12628 }, { "epoch": 0.23084797192315426, "grad_norm": 6.544339383939356, "learning_rate": 8.979084311977277e-06, "loss": 17.2998, "step": 12629 }, { "epoch": 0.2308662511196008, "grad_norm": 4.810441782297864, "learning_rate": 8.978905058952323e-06, "loss": 16.7762, "step": 12630 }, { "epoch": 0.2308845303160473, "grad_norm": 6.753057630563337, "learning_rate": 8.978725791981651e-06, "loss": 17.7448, "step": 12631 }, { "epoch": 0.23090280951249384, "grad_norm": 6.878295922185571, "learning_rate": 8.978546511065889e-06, "loss": 17.8306, "step": 12632 }, { "epoch": 0.23092108870894035, "grad_norm": 6.601055261394896, "learning_rate": 8.978367216205668e-06, "loss": 17.6226, "step": 12633 }, { "epoch": 0.23093936790538688, "grad_norm": 6.96294244153231, "learning_rate": 8.978187907401615e-06, "loss": 17.5169, "step": 12634 }, { "epoch": 0.2309576471018334, "grad_norm": 7.629841294699073, "learning_rate": 8.97800858465436e-06, "loss": 17.7823, "step": 12635 }, { "epoch": 0.23097592629827993, "grad_norm": 6.490291733731224, "learning_rate": 8.977829247964526e-06, "loss": 17.6466, "step": 12636 }, { "epoch": 0.23099420549472646, "grad_norm": 5.933055992843987, "learning_rate": 8.97764989733275e-06, "loss": 17.1796, "step": 12637 }, { "epoch": 0.23101248469117297, "grad_norm": 6.914293204747319, "learning_rate": 8.977470532759654e-06, "loss": 17.6409, "step": 12638 }, { "epoch": 0.2310307638876195, "grad_norm": 7.44226165891364, "learning_rate": 8.97729115424587e-06, "loss": 18.0814, "step": 12639 }, { "epoch": 0.231049043084066, "grad_norm": 5.928863316211935, "learning_rate": 8.977111761792026e-06, "loss": 17.2507, "step": 12640 }, { "epoch": 0.23106732228051255, "grad_norm": 6.736219046219845, "learning_rate": 8.97693235539875e-06, "loss": 17.6348, "step": 12641 }, { "epoch": 0.23108560147695908, "grad_norm": 6.705627412403998, "learning_rate": 8.976752935066671e-06, "loss": 17.7984, "step": 12642 }, { "epoch": 0.2311038806734056, "grad_norm": 6.887482315364359, "learning_rate": 8.976573500796417e-06, "loss": 17.8859, "step": 12643 }, { "epoch": 0.23112215986985213, "grad_norm": 6.418883513819873, "learning_rate": 8.97639405258862e-06, "loss": 17.6612, "step": 12644 }, { "epoch": 0.23114043906629864, "grad_norm": 6.931046768284051, "learning_rate": 8.976214590443905e-06, "loss": 17.7548, "step": 12645 }, { "epoch": 0.23115871826274517, "grad_norm": 6.651831068192549, "learning_rate": 8.976035114362903e-06, "loss": 17.7041, "step": 12646 }, { "epoch": 0.2311769974591917, "grad_norm": 7.906315986151466, "learning_rate": 8.975855624346244e-06, "loss": 17.8006, "step": 12647 }, { "epoch": 0.23119527665563822, "grad_norm": 7.181775795765101, "learning_rate": 8.975676120394555e-06, "loss": 17.8929, "step": 12648 }, { "epoch": 0.23121355585208475, "grad_norm": 6.2908296422667185, "learning_rate": 8.975496602508467e-06, "loss": 17.3297, "step": 12649 }, { "epoch": 0.23123183504853126, "grad_norm": 7.118094861537474, "learning_rate": 8.975317070688608e-06, "loss": 17.6819, "step": 12650 }, { "epoch": 0.2312501142449778, "grad_norm": 7.577308961162696, "learning_rate": 8.975137524935609e-06, "loss": 17.8884, "step": 12651 }, { "epoch": 0.2312683934414243, "grad_norm": 6.652621699368609, "learning_rate": 8.974957965250097e-06, "loss": 17.7604, "step": 12652 }, { "epoch": 0.23128667263787084, "grad_norm": 6.353048449739577, "learning_rate": 8.9747783916327e-06, "loss": 17.4359, "step": 12653 }, { "epoch": 0.23130495183431737, "grad_norm": 6.765843695500798, "learning_rate": 8.974598804084052e-06, "loss": 17.5919, "step": 12654 }, { "epoch": 0.23132323103076388, "grad_norm": 8.23210520293349, "learning_rate": 8.97441920260478e-06, "loss": 18.1861, "step": 12655 }, { "epoch": 0.23134151022721042, "grad_norm": 5.642530187166705, "learning_rate": 8.974239587195514e-06, "loss": 17.0812, "step": 12656 }, { "epoch": 0.23135978942365693, "grad_norm": 5.990051907955578, "learning_rate": 8.974059957856882e-06, "loss": 17.1461, "step": 12657 }, { "epoch": 0.23137806862010346, "grad_norm": 7.530881392920648, "learning_rate": 8.973880314589516e-06, "loss": 17.6615, "step": 12658 }, { "epoch": 0.23139634781655, "grad_norm": 6.605208950411215, "learning_rate": 8.973700657394043e-06, "loss": 17.5248, "step": 12659 }, { "epoch": 0.2314146270129965, "grad_norm": 7.059490635933132, "learning_rate": 8.973520986271094e-06, "loss": 17.6861, "step": 12660 }, { "epoch": 0.23143290620944304, "grad_norm": 6.815923729239079, "learning_rate": 8.9733413012213e-06, "loss": 17.4846, "step": 12661 }, { "epoch": 0.23145118540588955, "grad_norm": 6.614975434214911, "learning_rate": 8.973161602245288e-06, "loss": 17.7974, "step": 12662 }, { "epoch": 0.23146946460233608, "grad_norm": 6.350078515902742, "learning_rate": 8.97298188934369e-06, "loss": 17.5161, "step": 12663 }, { "epoch": 0.23148774379878262, "grad_norm": 6.253468614428047, "learning_rate": 8.972802162517136e-06, "loss": 17.8805, "step": 12664 }, { "epoch": 0.23150602299522913, "grad_norm": 6.4332769519713775, "learning_rate": 8.972622421766254e-06, "loss": 17.5103, "step": 12665 }, { "epoch": 0.23152430219167566, "grad_norm": 7.082491712673213, "learning_rate": 8.972442667091676e-06, "loss": 17.6118, "step": 12666 }, { "epoch": 0.23154258138812217, "grad_norm": 5.952803943455232, "learning_rate": 8.97226289849403e-06, "loss": 17.4437, "step": 12667 }, { "epoch": 0.2315608605845687, "grad_norm": 5.789099746715334, "learning_rate": 8.972083115973949e-06, "loss": 17.1918, "step": 12668 }, { "epoch": 0.23157913978101521, "grad_norm": 7.577675757610278, "learning_rate": 8.97190331953206e-06, "loss": 17.6084, "step": 12669 }, { "epoch": 0.23159741897746175, "grad_norm": 5.860040349956942, "learning_rate": 8.971723509168996e-06, "loss": 17.5445, "step": 12670 }, { "epoch": 0.23161569817390829, "grad_norm": 6.469267679013469, "learning_rate": 8.971543684885384e-06, "loss": 17.7468, "step": 12671 }, { "epoch": 0.2316339773703548, "grad_norm": 5.457767309634818, "learning_rate": 8.971363846681858e-06, "loss": 17.0538, "step": 12672 }, { "epoch": 0.23165225656680133, "grad_norm": 7.300162018109881, "learning_rate": 8.971183994559046e-06, "loss": 17.9823, "step": 12673 }, { "epoch": 0.23167053576324784, "grad_norm": 6.09247248393594, "learning_rate": 8.971004128517577e-06, "loss": 17.6069, "step": 12674 }, { "epoch": 0.23168881495969437, "grad_norm": 7.876931662668772, "learning_rate": 8.970824248558083e-06, "loss": 17.938, "step": 12675 }, { "epoch": 0.2317070941561409, "grad_norm": 5.714717702916206, "learning_rate": 8.970644354681196e-06, "loss": 17.1884, "step": 12676 }, { "epoch": 0.23172537335258742, "grad_norm": 6.139455239961285, "learning_rate": 8.970464446887544e-06, "loss": 17.1756, "step": 12677 }, { "epoch": 0.23174365254903395, "grad_norm": 6.4457883195426415, "learning_rate": 8.97028452517776e-06, "loss": 17.4024, "step": 12678 }, { "epoch": 0.23176193174548046, "grad_norm": 10.106626906983957, "learning_rate": 8.970104589552472e-06, "loss": 19.3147, "step": 12679 }, { "epoch": 0.231780210941927, "grad_norm": 7.817700354434268, "learning_rate": 8.969924640012312e-06, "loss": 18.1592, "step": 12680 }, { "epoch": 0.23179849013837353, "grad_norm": 7.803055270135185, "learning_rate": 8.969744676557912e-06, "loss": 18.1815, "step": 12681 }, { "epoch": 0.23181676933482004, "grad_norm": 8.738677894192076, "learning_rate": 8.9695646991899e-06, "loss": 18.6096, "step": 12682 }, { "epoch": 0.23183504853126657, "grad_norm": 8.074642631159707, "learning_rate": 8.96938470790891e-06, "loss": 17.9484, "step": 12683 }, { "epoch": 0.23185332772771308, "grad_norm": 6.445153140159986, "learning_rate": 8.969204702715568e-06, "loss": 17.2538, "step": 12684 }, { "epoch": 0.23187160692415962, "grad_norm": 7.28801149522414, "learning_rate": 8.96902468361051e-06, "loss": 18.0793, "step": 12685 }, { "epoch": 0.23188988612060613, "grad_norm": 6.45430193524104, "learning_rate": 8.968844650594363e-06, "loss": 17.4716, "step": 12686 }, { "epoch": 0.23190816531705266, "grad_norm": 7.189783644839498, "learning_rate": 8.968664603667763e-06, "loss": 17.4625, "step": 12687 }, { "epoch": 0.2319264445134992, "grad_norm": 6.673588427535199, "learning_rate": 8.968484542831337e-06, "loss": 17.4997, "step": 12688 }, { "epoch": 0.2319447237099457, "grad_norm": 7.319510417789561, "learning_rate": 8.968304468085715e-06, "loss": 18.1252, "step": 12689 }, { "epoch": 0.23196300290639224, "grad_norm": 6.755870910300304, "learning_rate": 8.968124379431533e-06, "loss": 17.6274, "step": 12690 }, { "epoch": 0.23198128210283875, "grad_norm": 6.9792511913835655, "learning_rate": 8.967944276869419e-06, "loss": 17.5144, "step": 12691 }, { "epoch": 0.23199956129928528, "grad_norm": 6.2217243828616455, "learning_rate": 8.967764160400002e-06, "loss": 17.3082, "step": 12692 }, { "epoch": 0.23201784049573182, "grad_norm": 6.089639947574932, "learning_rate": 8.967584030023916e-06, "loss": 17.3434, "step": 12693 }, { "epoch": 0.23203611969217833, "grad_norm": 6.430558611785808, "learning_rate": 8.967403885741795e-06, "loss": 17.5896, "step": 12694 }, { "epoch": 0.23205439888862486, "grad_norm": 5.206884105273005, "learning_rate": 8.967223727554267e-06, "loss": 16.9609, "step": 12695 }, { "epoch": 0.23207267808507137, "grad_norm": 6.467597193668249, "learning_rate": 8.967043555461964e-06, "loss": 17.4015, "step": 12696 }, { "epoch": 0.2320909572815179, "grad_norm": 6.7907705601061235, "learning_rate": 8.966863369465517e-06, "loss": 17.4738, "step": 12697 }, { "epoch": 0.23210923647796444, "grad_norm": 7.911519043403179, "learning_rate": 8.966683169565557e-06, "loss": 18.1934, "step": 12698 }, { "epoch": 0.23212751567441095, "grad_norm": 6.4982645684128615, "learning_rate": 8.96650295576272e-06, "loss": 17.3533, "step": 12699 }, { "epoch": 0.23214579487085749, "grad_norm": 5.802643014414197, "learning_rate": 8.966322728057632e-06, "loss": 17.2984, "step": 12700 }, { "epoch": 0.232164074067304, "grad_norm": 6.9079188967253655, "learning_rate": 8.966142486450925e-06, "loss": 17.6317, "step": 12701 }, { "epoch": 0.23218235326375053, "grad_norm": 6.650105180734968, "learning_rate": 8.965962230943236e-06, "loss": 17.5343, "step": 12702 }, { "epoch": 0.23220063246019704, "grad_norm": 6.969674418237345, "learning_rate": 8.965781961535194e-06, "loss": 17.6164, "step": 12703 }, { "epoch": 0.23221891165664357, "grad_norm": 6.3892235768617915, "learning_rate": 8.96560167822743e-06, "loss": 17.4689, "step": 12704 }, { "epoch": 0.2322371908530901, "grad_norm": 6.387298192158086, "learning_rate": 8.965421381020573e-06, "loss": 17.2971, "step": 12705 }, { "epoch": 0.23225547004953662, "grad_norm": 9.097806062465553, "learning_rate": 8.965241069915262e-06, "loss": 18.1642, "step": 12706 }, { "epoch": 0.23227374924598315, "grad_norm": 7.635211232771337, "learning_rate": 8.965060744912123e-06, "loss": 18.0983, "step": 12707 }, { "epoch": 0.23229202844242966, "grad_norm": 6.649749231872817, "learning_rate": 8.96488040601179e-06, "loss": 17.6738, "step": 12708 }, { "epoch": 0.2323103076388762, "grad_norm": 8.254706481009299, "learning_rate": 8.964700053214896e-06, "loss": 17.9174, "step": 12709 }, { "epoch": 0.23232858683532273, "grad_norm": 7.083713670044754, "learning_rate": 8.964519686522073e-06, "loss": 17.7431, "step": 12710 }, { "epoch": 0.23234686603176924, "grad_norm": 6.654268352523479, "learning_rate": 8.964339305933952e-06, "loss": 17.5484, "step": 12711 }, { "epoch": 0.23236514522821577, "grad_norm": 7.396280795160867, "learning_rate": 8.964158911451165e-06, "loss": 17.6135, "step": 12712 }, { "epoch": 0.23238342442466228, "grad_norm": 6.058802860490418, "learning_rate": 8.963978503074345e-06, "loss": 17.4924, "step": 12713 }, { "epoch": 0.23240170362110882, "grad_norm": 6.604578914598495, "learning_rate": 8.963798080804126e-06, "loss": 17.426, "step": 12714 }, { "epoch": 0.23241998281755535, "grad_norm": 6.19249065287721, "learning_rate": 8.963617644641138e-06, "loss": 17.308, "step": 12715 }, { "epoch": 0.23243826201400186, "grad_norm": 6.900919621465627, "learning_rate": 8.963437194586013e-06, "loss": 17.8536, "step": 12716 }, { "epoch": 0.2324565412104484, "grad_norm": 6.7578084758593, "learning_rate": 8.963256730639384e-06, "loss": 17.2981, "step": 12717 }, { "epoch": 0.2324748204068949, "grad_norm": 6.030502383830794, "learning_rate": 8.963076252801886e-06, "loss": 17.32, "step": 12718 }, { "epoch": 0.23249309960334144, "grad_norm": 7.2092960685317555, "learning_rate": 8.96289576107415e-06, "loss": 17.8071, "step": 12719 }, { "epoch": 0.23251137879978795, "grad_norm": 6.50642651713053, "learning_rate": 8.962715255456806e-06, "loss": 17.6434, "step": 12720 }, { "epoch": 0.23252965799623448, "grad_norm": 8.128287658462336, "learning_rate": 8.96253473595049e-06, "loss": 17.6088, "step": 12721 }, { "epoch": 0.23254793719268102, "grad_norm": 6.46303576738472, "learning_rate": 8.962354202555834e-06, "loss": 17.3952, "step": 12722 }, { "epoch": 0.23256621638912753, "grad_norm": 8.03783768046001, "learning_rate": 8.96217365527347e-06, "loss": 18.1464, "step": 12723 }, { "epoch": 0.23258449558557406, "grad_norm": 6.817841189634383, "learning_rate": 8.961993094104031e-06, "loss": 17.5995, "step": 12724 }, { "epoch": 0.23260277478202057, "grad_norm": 6.97774497101537, "learning_rate": 8.96181251904815e-06, "loss": 17.9206, "step": 12725 }, { "epoch": 0.2326210539784671, "grad_norm": 7.485176546420922, "learning_rate": 8.96163193010646e-06, "loss": 18.1183, "step": 12726 }, { "epoch": 0.23263933317491364, "grad_norm": 7.727677566925891, "learning_rate": 8.961451327279595e-06, "loss": 17.796, "step": 12727 }, { "epoch": 0.23265761237136015, "grad_norm": 7.2993677626770115, "learning_rate": 8.961270710568185e-06, "loss": 17.8907, "step": 12728 }, { "epoch": 0.2326758915678067, "grad_norm": 7.207807208792943, "learning_rate": 8.961090079972865e-06, "loss": 17.7457, "step": 12729 }, { "epoch": 0.2326941707642532, "grad_norm": 7.123250316362162, "learning_rate": 8.960909435494269e-06, "loss": 17.9557, "step": 12730 }, { "epoch": 0.23271244996069973, "grad_norm": 5.795406937156422, "learning_rate": 8.96072877713303e-06, "loss": 17.187, "step": 12731 }, { "epoch": 0.23273072915714627, "grad_norm": 6.060058257382089, "learning_rate": 8.960548104889778e-06, "loss": 17.377, "step": 12732 }, { "epoch": 0.23274900835359277, "grad_norm": 9.463240980061615, "learning_rate": 8.960367418765152e-06, "loss": 18.9757, "step": 12733 }, { "epoch": 0.2327672875500393, "grad_norm": 7.1875089109188535, "learning_rate": 8.960186718759778e-06, "loss": 17.7208, "step": 12734 }, { "epoch": 0.23278556674648582, "grad_norm": 9.197511583933995, "learning_rate": 8.960006004874295e-06, "loss": 18.0441, "step": 12735 }, { "epoch": 0.23280384594293235, "grad_norm": 7.426393746853364, "learning_rate": 8.959825277109334e-06, "loss": 18.1336, "step": 12736 }, { "epoch": 0.23282212513937886, "grad_norm": 4.9142180878185915, "learning_rate": 8.95964453546553e-06, "loss": 16.8768, "step": 12737 }, { "epoch": 0.2328404043358254, "grad_norm": 7.249341077977238, "learning_rate": 8.959463779943516e-06, "loss": 17.856, "step": 12738 }, { "epoch": 0.23285868353227193, "grad_norm": 6.544874946263931, "learning_rate": 8.959283010543923e-06, "loss": 17.4914, "step": 12739 }, { "epoch": 0.23287696272871844, "grad_norm": 6.973432445210602, "learning_rate": 8.959102227267387e-06, "loss": 17.5418, "step": 12740 }, { "epoch": 0.23289524192516498, "grad_norm": 7.673467131029792, "learning_rate": 8.958921430114542e-06, "loss": 18.4472, "step": 12741 }, { "epoch": 0.23291352112161148, "grad_norm": 6.957441134901725, "learning_rate": 8.95874061908602e-06, "loss": 17.5476, "step": 12742 }, { "epoch": 0.23293180031805802, "grad_norm": 7.476236583511359, "learning_rate": 8.958559794182457e-06, "loss": 18.1306, "step": 12743 }, { "epoch": 0.23295007951450455, "grad_norm": 7.373016572378293, "learning_rate": 8.958378955404486e-06, "loss": 17.622, "step": 12744 }, { "epoch": 0.23296835871095106, "grad_norm": 7.485544217058992, "learning_rate": 8.95819810275274e-06, "loss": 17.9682, "step": 12745 }, { "epoch": 0.2329866379073976, "grad_norm": 6.912468244774014, "learning_rate": 8.958017236227851e-06, "loss": 17.6617, "step": 12746 }, { "epoch": 0.2330049171038441, "grad_norm": 6.7659822035034, "learning_rate": 8.957836355830456e-06, "loss": 17.5976, "step": 12747 }, { "epoch": 0.23302319630029064, "grad_norm": 6.703200527664176, "learning_rate": 8.957655461561188e-06, "loss": 17.5962, "step": 12748 }, { "epoch": 0.23304147549673718, "grad_norm": 6.790172140328692, "learning_rate": 8.957474553420681e-06, "loss": 17.5057, "step": 12749 }, { "epoch": 0.23305975469318368, "grad_norm": 8.478019310612417, "learning_rate": 8.957293631409571e-06, "loss": 18.1948, "step": 12750 }, { "epoch": 0.23307803388963022, "grad_norm": 6.657350113306031, "learning_rate": 8.95711269552849e-06, "loss": 17.4076, "step": 12751 }, { "epoch": 0.23309631308607673, "grad_norm": 6.443575958162277, "learning_rate": 8.95693174577807e-06, "loss": 17.593, "step": 12752 }, { "epoch": 0.23311459228252326, "grad_norm": 5.822279201249806, "learning_rate": 8.956750782158948e-06, "loss": 17.2569, "step": 12753 }, { "epoch": 0.23313287147896977, "grad_norm": 5.829004248004184, "learning_rate": 8.956569804671759e-06, "loss": 17.3094, "step": 12754 }, { "epoch": 0.2331511506754163, "grad_norm": 6.440022187282789, "learning_rate": 8.956388813317136e-06, "loss": 17.5126, "step": 12755 }, { "epoch": 0.23316942987186284, "grad_norm": 5.129374670689684, "learning_rate": 8.956207808095713e-06, "loss": 16.9751, "step": 12756 }, { "epoch": 0.23318770906830935, "grad_norm": 7.600104472662688, "learning_rate": 8.956026789008126e-06, "loss": 18.2364, "step": 12757 }, { "epoch": 0.2332059882647559, "grad_norm": 6.964699187721291, "learning_rate": 8.955845756055007e-06, "loss": 17.9402, "step": 12758 }, { "epoch": 0.2332242674612024, "grad_norm": 6.181002011419887, "learning_rate": 8.955664709236992e-06, "loss": 17.3155, "step": 12759 }, { "epoch": 0.23324254665764893, "grad_norm": 6.814603945740874, "learning_rate": 8.955483648554716e-06, "loss": 17.8268, "step": 12760 }, { "epoch": 0.23326082585409547, "grad_norm": 7.091746026402167, "learning_rate": 8.955302574008813e-06, "loss": 17.8012, "step": 12761 }, { "epoch": 0.23327910505054197, "grad_norm": 8.211456720586112, "learning_rate": 8.955121485599919e-06, "loss": 18.3147, "step": 12762 }, { "epoch": 0.2332973842469885, "grad_norm": 6.807629775205399, "learning_rate": 8.954940383328666e-06, "loss": 17.7392, "step": 12763 }, { "epoch": 0.23331566344343502, "grad_norm": 6.383849031235497, "learning_rate": 8.95475926719569e-06, "loss": 17.5924, "step": 12764 }, { "epoch": 0.23333394263988155, "grad_norm": 6.147107205787592, "learning_rate": 8.954578137201625e-06, "loss": 17.4159, "step": 12765 }, { "epoch": 0.2333522218363281, "grad_norm": 7.702299531053069, "learning_rate": 8.954396993347107e-06, "loss": 17.5864, "step": 12766 }, { "epoch": 0.2333705010327746, "grad_norm": 7.270374554348164, "learning_rate": 8.954215835632774e-06, "loss": 17.8174, "step": 12767 }, { "epoch": 0.23338878022922113, "grad_norm": 7.072935465236767, "learning_rate": 8.954034664059254e-06, "loss": 18.1406, "step": 12768 }, { "epoch": 0.23340705942566764, "grad_norm": 6.522551304604889, "learning_rate": 8.953853478627187e-06, "loss": 17.4885, "step": 12769 }, { "epoch": 0.23342533862211418, "grad_norm": 7.153524358540162, "learning_rate": 8.953672279337206e-06, "loss": 17.5572, "step": 12770 }, { "epoch": 0.23344361781856068, "grad_norm": 7.474197906048436, "learning_rate": 8.953491066189948e-06, "loss": 17.7423, "step": 12771 }, { "epoch": 0.23346189701500722, "grad_norm": 6.416496933823563, "learning_rate": 8.953309839186047e-06, "loss": 17.5954, "step": 12772 }, { "epoch": 0.23348017621145375, "grad_norm": 6.322843043443495, "learning_rate": 8.953128598326136e-06, "loss": 17.4895, "step": 12773 }, { "epoch": 0.23349845540790026, "grad_norm": 6.33223118161616, "learning_rate": 8.952947343610854e-06, "loss": 17.1197, "step": 12774 }, { "epoch": 0.2335167346043468, "grad_norm": 7.066635935635524, "learning_rate": 8.952766075040833e-06, "loss": 17.8293, "step": 12775 }, { "epoch": 0.2335350138007933, "grad_norm": 5.944662456169544, "learning_rate": 8.952584792616712e-06, "loss": 17.1289, "step": 12776 }, { "epoch": 0.23355329299723984, "grad_norm": 6.3679384570264554, "learning_rate": 8.952403496339124e-06, "loss": 17.5574, "step": 12777 }, { "epoch": 0.23357157219368638, "grad_norm": 6.424547901175277, "learning_rate": 8.952222186208703e-06, "loss": 17.4749, "step": 12778 }, { "epoch": 0.23358985139013289, "grad_norm": 7.124514106204784, "learning_rate": 8.952040862226089e-06, "loss": 17.9767, "step": 12779 }, { "epoch": 0.23360813058657942, "grad_norm": 5.489985558523886, "learning_rate": 8.951859524391912e-06, "loss": 17.1535, "step": 12780 }, { "epoch": 0.23362640978302593, "grad_norm": 6.904053710838957, "learning_rate": 8.95167817270681e-06, "loss": 17.8272, "step": 12781 }, { "epoch": 0.23364468897947246, "grad_norm": 7.261939271079995, "learning_rate": 8.95149680717142e-06, "loss": 17.7751, "step": 12782 }, { "epoch": 0.233662968175919, "grad_norm": 8.35605896603481, "learning_rate": 8.951315427786378e-06, "loss": 18.2255, "step": 12783 }, { "epoch": 0.2336812473723655, "grad_norm": 6.444609110134202, "learning_rate": 8.951134034552316e-06, "loss": 17.2849, "step": 12784 }, { "epoch": 0.23369952656881204, "grad_norm": 7.343231371580179, "learning_rate": 8.950952627469873e-06, "loss": 17.8932, "step": 12785 }, { "epoch": 0.23371780576525855, "grad_norm": 6.195981454310583, "learning_rate": 8.950771206539685e-06, "loss": 17.4204, "step": 12786 }, { "epoch": 0.2337360849617051, "grad_norm": 6.676724333091879, "learning_rate": 8.950589771762386e-06, "loss": 17.728, "step": 12787 }, { "epoch": 0.2337543641581516, "grad_norm": 7.8657389649096645, "learning_rate": 8.950408323138612e-06, "loss": 17.8626, "step": 12788 }, { "epoch": 0.23377264335459813, "grad_norm": 6.928884863418598, "learning_rate": 8.950226860669001e-06, "loss": 17.6, "step": 12789 }, { "epoch": 0.23379092255104467, "grad_norm": 5.847325564075314, "learning_rate": 8.950045384354185e-06, "loss": 17.2806, "step": 12790 }, { "epoch": 0.23380920174749117, "grad_norm": 7.902027079744283, "learning_rate": 8.949863894194806e-06, "loss": 18.4469, "step": 12791 }, { "epoch": 0.2338274809439377, "grad_norm": 6.5891202226334515, "learning_rate": 8.949682390191495e-06, "loss": 17.6076, "step": 12792 }, { "epoch": 0.23384576014038422, "grad_norm": 9.77565731334052, "learning_rate": 8.94950087234489e-06, "loss": 17.8782, "step": 12793 }, { "epoch": 0.23386403933683075, "grad_norm": 5.912218694025467, "learning_rate": 8.949319340655628e-06, "loss": 17.1442, "step": 12794 }, { "epoch": 0.2338823185332773, "grad_norm": 6.861787325914959, "learning_rate": 8.949137795124342e-06, "loss": 17.6416, "step": 12795 }, { "epoch": 0.2339005977297238, "grad_norm": 7.681415121848571, "learning_rate": 8.948956235751673e-06, "loss": 18.4279, "step": 12796 }, { "epoch": 0.23391887692617033, "grad_norm": 6.311152196060109, "learning_rate": 8.948774662538255e-06, "loss": 17.3943, "step": 12797 }, { "epoch": 0.23393715612261684, "grad_norm": 5.858300943469095, "learning_rate": 8.948593075484724e-06, "loss": 17.4344, "step": 12798 }, { "epoch": 0.23395543531906338, "grad_norm": 7.530169804622749, "learning_rate": 8.948411474591716e-06, "loss": 18.3767, "step": 12799 }, { "epoch": 0.2339737145155099, "grad_norm": 5.9681224194622615, "learning_rate": 8.94822985985987e-06, "loss": 17.1825, "step": 12800 }, { "epoch": 0.23399199371195642, "grad_norm": 8.049380654632524, "learning_rate": 8.948048231289822e-06, "loss": 18.1197, "step": 12801 }, { "epoch": 0.23401027290840296, "grad_norm": 5.58038187881301, "learning_rate": 8.947866588882204e-06, "loss": 16.7757, "step": 12802 }, { "epoch": 0.23402855210484946, "grad_norm": 5.958354439439862, "learning_rate": 8.947684932637658e-06, "loss": 17.3348, "step": 12803 }, { "epoch": 0.234046831301296, "grad_norm": 6.689782861703746, "learning_rate": 8.947503262556819e-06, "loss": 17.3113, "step": 12804 }, { "epoch": 0.2340651104977425, "grad_norm": 6.538472801654387, "learning_rate": 8.947321578640323e-06, "loss": 17.3539, "step": 12805 }, { "epoch": 0.23408338969418904, "grad_norm": 7.123948310786515, "learning_rate": 8.94713988088881e-06, "loss": 18.0318, "step": 12806 }, { "epoch": 0.23410166889063558, "grad_norm": 7.1576811407236915, "learning_rate": 8.94695816930291e-06, "loss": 17.4988, "step": 12807 }, { "epoch": 0.23411994808708209, "grad_norm": 6.831561770504608, "learning_rate": 8.946776443883267e-06, "loss": 17.8309, "step": 12808 }, { "epoch": 0.23413822728352862, "grad_norm": 6.5033539221865455, "learning_rate": 8.946594704630514e-06, "loss": 17.8588, "step": 12809 }, { "epoch": 0.23415650647997513, "grad_norm": 8.120063990024946, "learning_rate": 8.946412951545289e-06, "loss": 17.649, "step": 12810 }, { "epoch": 0.23417478567642167, "grad_norm": 6.680866550908306, "learning_rate": 8.94623118462823e-06, "loss": 17.6354, "step": 12811 }, { "epoch": 0.2341930648728682, "grad_norm": 6.258034552886107, "learning_rate": 8.946049403879973e-06, "loss": 17.4059, "step": 12812 }, { "epoch": 0.2342113440693147, "grad_norm": 6.586197990355327, "learning_rate": 8.945867609301153e-06, "loss": 17.6528, "step": 12813 }, { "epoch": 0.23422962326576124, "grad_norm": 7.588036149324091, "learning_rate": 8.945685800892412e-06, "loss": 18.1779, "step": 12814 }, { "epoch": 0.23424790246220775, "grad_norm": 8.872585606885204, "learning_rate": 8.945503978654384e-06, "loss": 16.9234, "step": 12815 }, { "epoch": 0.2342661816586543, "grad_norm": 6.366790588751804, "learning_rate": 8.945322142587706e-06, "loss": 17.4695, "step": 12816 }, { "epoch": 0.23428446085510082, "grad_norm": 6.534386797969441, "learning_rate": 8.945140292693017e-06, "loss": 17.5107, "step": 12817 }, { "epoch": 0.23430274005154733, "grad_norm": 6.230167768905743, "learning_rate": 8.944958428970954e-06, "loss": 17.4147, "step": 12818 }, { "epoch": 0.23432101924799387, "grad_norm": 5.579383110813056, "learning_rate": 8.944776551422154e-06, "loss": 17.3322, "step": 12819 }, { "epoch": 0.23433929844444037, "grad_norm": 6.323520705722305, "learning_rate": 8.944594660047254e-06, "loss": 17.1582, "step": 12820 }, { "epoch": 0.2343575776408869, "grad_norm": 6.766699744550753, "learning_rate": 8.944412754846892e-06, "loss": 17.4968, "step": 12821 }, { "epoch": 0.23437585683733342, "grad_norm": 6.279371910677322, "learning_rate": 8.944230835821706e-06, "loss": 17.4284, "step": 12822 }, { "epoch": 0.23439413603377995, "grad_norm": 7.40148709909635, "learning_rate": 8.944048902972334e-06, "loss": 18.0285, "step": 12823 }, { "epoch": 0.2344124152302265, "grad_norm": 7.320197020027127, "learning_rate": 8.943866956299413e-06, "loss": 17.9752, "step": 12824 }, { "epoch": 0.234430694426673, "grad_norm": 6.236710783299506, "learning_rate": 8.943684995803578e-06, "loss": 17.4248, "step": 12825 }, { "epoch": 0.23444897362311953, "grad_norm": 7.525352570988221, "learning_rate": 8.943503021485472e-06, "loss": 18.0884, "step": 12826 }, { "epoch": 0.23446725281956604, "grad_norm": 6.153165647566515, "learning_rate": 8.943321033345726e-06, "loss": 17.4561, "step": 12827 }, { "epoch": 0.23448553201601258, "grad_norm": 6.773943377440282, "learning_rate": 8.943139031384986e-06, "loss": 17.6015, "step": 12828 }, { "epoch": 0.2345038112124591, "grad_norm": 6.637106740298016, "learning_rate": 8.942957015603883e-06, "loss": 17.4788, "step": 12829 }, { "epoch": 0.23452209040890562, "grad_norm": 5.7709940820779, "learning_rate": 8.942774986003062e-06, "loss": 17.2076, "step": 12830 }, { "epoch": 0.23454036960535216, "grad_norm": 7.194045688760218, "learning_rate": 8.942592942583152e-06, "loss": 18.0743, "step": 12831 }, { "epoch": 0.23455864880179866, "grad_norm": 8.773155649550077, "learning_rate": 8.942410885344798e-06, "loss": 18.5074, "step": 12832 }, { "epoch": 0.2345769279982452, "grad_norm": 6.372220643277621, "learning_rate": 8.942228814288638e-06, "loss": 17.4747, "step": 12833 }, { "epoch": 0.23459520719469173, "grad_norm": 7.278206166951027, "learning_rate": 8.942046729415305e-06, "loss": 17.9158, "step": 12834 }, { "epoch": 0.23461348639113824, "grad_norm": 8.174325706353997, "learning_rate": 8.941864630725442e-06, "loss": 18.0042, "step": 12835 }, { "epoch": 0.23463176558758478, "grad_norm": 6.661736661078449, "learning_rate": 8.941682518219685e-06, "loss": 17.703, "step": 12836 }, { "epoch": 0.2346500447840313, "grad_norm": 6.85330602778627, "learning_rate": 8.941500391898672e-06, "loss": 17.486, "step": 12837 }, { "epoch": 0.23466832398047782, "grad_norm": 6.094453699887702, "learning_rate": 8.941318251763043e-06, "loss": 17.5093, "step": 12838 }, { "epoch": 0.23468660317692433, "grad_norm": 6.089287552241428, "learning_rate": 8.941136097813437e-06, "loss": 17.4301, "step": 12839 }, { "epoch": 0.23470488237337087, "grad_norm": 6.786967476713644, "learning_rate": 8.940953930050488e-06, "loss": 17.8137, "step": 12840 }, { "epoch": 0.2347231615698174, "grad_norm": 6.399359306518545, "learning_rate": 8.94077174847484e-06, "loss": 17.1748, "step": 12841 }, { "epoch": 0.2347414407662639, "grad_norm": 5.57518412161416, "learning_rate": 8.940589553087128e-06, "loss": 17.1147, "step": 12842 }, { "epoch": 0.23475971996271044, "grad_norm": 5.686589382383859, "learning_rate": 8.940407343887991e-06, "loss": 17.2473, "step": 12843 }, { "epoch": 0.23477799915915695, "grad_norm": 7.324812993600869, "learning_rate": 8.940225120878069e-06, "loss": 17.9225, "step": 12844 }, { "epoch": 0.2347962783556035, "grad_norm": 6.808685104709658, "learning_rate": 8.940042884058e-06, "loss": 17.626, "step": 12845 }, { "epoch": 0.23481455755205002, "grad_norm": 7.521610994948991, "learning_rate": 8.93986063342842e-06, "loss": 17.8172, "step": 12846 }, { "epoch": 0.23483283674849653, "grad_norm": 6.801419585653584, "learning_rate": 8.939678368989973e-06, "loss": 17.6693, "step": 12847 }, { "epoch": 0.23485111594494307, "grad_norm": 6.844620130150307, "learning_rate": 8.939496090743296e-06, "loss": 17.5882, "step": 12848 }, { "epoch": 0.23486939514138958, "grad_norm": 7.224860326249475, "learning_rate": 8.939313798689026e-06, "loss": 17.6952, "step": 12849 }, { "epoch": 0.2348876743378361, "grad_norm": 7.1950922179245005, "learning_rate": 8.939131492827801e-06, "loss": 17.9068, "step": 12850 }, { "epoch": 0.23490595353428265, "grad_norm": 7.036529686621534, "learning_rate": 8.938949173160266e-06, "loss": 17.6325, "step": 12851 }, { "epoch": 0.23492423273072915, "grad_norm": 5.548452549926464, "learning_rate": 8.938766839687053e-06, "loss": 16.9409, "step": 12852 }, { "epoch": 0.2349425119271757, "grad_norm": 6.460058088370336, "learning_rate": 8.938584492408805e-06, "loss": 17.7953, "step": 12853 }, { "epoch": 0.2349607911236222, "grad_norm": 5.865582744887052, "learning_rate": 8.938402131326158e-06, "loss": 17.3237, "step": 12854 }, { "epoch": 0.23497907032006873, "grad_norm": 5.507687234652714, "learning_rate": 8.938219756439755e-06, "loss": 16.9353, "step": 12855 }, { "epoch": 0.23499734951651524, "grad_norm": 6.740356373881581, "learning_rate": 8.938037367750234e-06, "loss": 17.8141, "step": 12856 }, { "epoch": 0.23501562871296178, "grad_norm": 8.740237844071164, "learning_rate": 8.937854965258234e-06, "loss": 18.3672, "step": 12857 }, { "epoch": 0.2350339079094083, "grad_norm": 6.841865049004293, "learning_rate": 8.937672548964394e-06, "loss": 17.5426, "step": 12858 }, { "epoch": 0.23505218710585482, "grad_norm": 6.5395683840692, "learning_rate": 8.937490118869353e-06, "loss": 17.7061, "step": 12859 }, { "epoch": 0.23507046630230136, "grad_norm": 7.60807010734477, "learning_rate": 8.93730767497375e-06, "loss": 17.8479, "step": 12860 }, { "epoch": 0.23508874549874786, "grad_norm": 8.067536951243808, "learning_rate": 8.937125217278225e-06, "loss": 17.991, "step": 12861 }, { "epoch": 0.2351070246951944, "grad_norm": 9.064851064343651, "learning_rate": 8.936942745783419e-06, "loss": 18.023, "step": 12862 }, { "epoch": 0.23512530389164094, "grad_norm": 6.907223170853111, "learning_rate": 8.93676026048997e-06, "loss": 17.6457, "step": 12863 }, { "epoch": 0.23514358308808744, "grad_norm": 6.0732925254634695, "learning_rate": 8.936577761398517e-06, "loss": 17.3544, "step": 12864 }, { "epoch": 0.23516186228453398, "grad_norm": 6.541697149923802, "learning_rate": 8.936395248509701e-06, "loss": 17.4098, "step": 12865 }, { "epoch": 0.2351801414809805, "grad_norm": 7.675510136049659, "learning_rate": 8.936212721824163e-06, "loss": 18.1187, "step": 12866 }, { "epoch": 0.23519842067742702, "grad_norm": 5.363468558491413, "learning_rate": 8.936030181342538e-06, "loss": 17.0235, "step": 12867 }, { "epoch": 0.23521669987387356, "grad_norm": 7.878788765603472, "learning_rate": 8.93584762706547e-06, "loss": 18.066, "step": 12868 }, { "epoch": 0.23523497907032007, "grad_norm": 6.932014476267707, "learning_rate": 8.9356650589936e-06, "loss": 17.7187, "step": 12869 }, { "epoch": 0.2352532582667666, "grad_norm": 7.661415455649792, "learning_rate": 8.935482477127562e-06, "loss": 18.097, "step": 12870 }, { "epoch": 0.2352715374632131, "grad_norm": 6.603252898003402, "learning_rate": 8.935299881468e-06, "loss": 17.6628, "step": 12871 }, { "epoch": 0.23528981665965965, "grad_norm": 7.8976845859302935, "learning_rate": 8.935117272015556e-06, "loss": 17.9888, "step": 12872 }, { "epoch": 0.23530809585610615, "grad_norm": 6.789037235858639, "learning_rate": 8.934934648770865e-06, "loss": 17.3038, "step": 12873 }, { "epoch": 0.2353263750525527, "grad_norm": 7.693775745814941, "learning_rate": 8.93475201173457e-06, "loss": 17.7252, "step": 12874 }, { "epoch": 0.23534465424899922, "grad_norm": 6.818681729605643, "learning_rate": 8.934569360907311e-06, "loss": 17.5436, "step": 12875 }, { "epoch": 0.23536293344544573, "grad_norm": 5.904524982784955, "learning_rate": 8.934386696289728e-06, "loss": 17.2418, "step": 12876 }, { "epoch": 0.23538121264189227, "grad_norm": 6.5986397666194, "learning_rate": 8.93420401788246e-06, "loss": 17.5913, "step": 12877 }, { "epoch": 0.23539949183833878, "grad_norm": 6.815460505642056, "learning_rate": 8.934021325686149e-06, "loss": 17.6721, "step": 12878 }, { "epoch": 0.2354177710347853, "grad_norm": 6.401695212403851, "learning_rate": 8.933838619701435e-06, "loss": 17.3721, "step": 12879 }, { "epoch": 0.23543605023123185, "grad_norm": 8.346930827955845, "learning_rate": 8.933655899928958e-06, "loss": 18.7162, "step": 12880 }, { "epoch": 0.23545432942767835, "grad_norm": 7.052508918572586, "learning_rate": 8.933473166369358e-06, "loss": 18.0449, "step": 12881 }, { "epoch": 0.2354726086241249, "grad_norm": 8.469230945531697, "learning_rate": 8.933290419023276e-06, "loss": 17.8177, "step": 12882 }, { "epoch": 0.2354908878205714, "grad_norm": 4.87375639354391, "learning_rate": 8.933107657891352e-06, "loss": 16.851, "step": 12883 }, { "epoch": 0.23550916701701793, "grad_norm": 6.521544436001019, "learning_rate": 8.932924882974228e-06, "loss": 17.6313, "step": 12884 }, { "epoch": 0.23552744621346447, "grad_norm": 6.171467419393666, "learning_rate": 8.932742094272541e-06, "loss": 17.2186, "step": 12885 }, { "epoch": 0.23554572540991098, "grad_norm": 7.055093229422603, "learning_rate": 8.932559291786937e-06, "loss": 17.8389, "step": 12886 }, { "epoch": 0.2355640046063575, "grad_norm": 6.894632962673046, "learning_rate": 8.932376475518054e-06, "loss": 17.9723, "step": 12887 }, { "epoch": 0.23558228380280402, "grad_norm": 6.846858043183039, "learning_rate": 8.932193645466531e-06, "loss": 17.7025, "step": 12888 }, { "epoch": 0.23560056299925056, "grad_norm": 7.236398562057061, "learning_rate": 8.93201080163301e-06, "loss": 17.7761, "step": 12889 }, { "epoch": 0.23561884219569706, "grad_norm": 7.315558731868937, "learning_rate": 8.931827944018134e-06, "loss": 18.054, "step": 12890 }, { "epoch": 0.2356371213921436, "grad_norm": 8.255749287326358, "learning_rate": 8.931645072622544e-06, "loss": 18.0403, "step": 12891 }, { "epoch": 0.23565540058859014, "grad_norm": 6.367375865660743, "learning_rate": 8.931462187446875e-06, "loss": 17.4986, "step": 12892 }, { "epoch": 0.23567367978503664, "grad_norm": 7.502225720027451, "learning_rate": 8.931279288491774e-06, "loss": 17.772, "step": 12893 }, { "epoch": 0.23569195898148318, "grad_norm": 5.609192326773627, "learning_rate": 8.931096375757882e-06, "loss": 17.3737, "step": 12894 }, { "epoch": 0.2357102381779297, "grad_norm": 8.235533754702457, "learning_rate": 8.930913449245836e-06, "loss": 18.11, "step": 12895 }, { "epoch": 0.23572851737437622, "grad_norm": 6.837312824543016, "learning_rate": 8.93073050895628e-06, "loss": 17.6328, "step": 12896 }, { "epoch": 0.23574679657082276, "grad_norm": 6.357263898915531, "learning_rate": 8.930547554889854e-06, "loss": 17.4212, "step": 12897 }, { "epoch": 0.23576507576726927, "grad_norm": 6.196444524082612, "learning_rate": 8.930364587047202e-06, "loss": 17.5793, "step": 12898 }, { "epoch": 0.2357833549637158, "grad_norm": 6.513390266384138, "learning_rate": 8.930181605428962e-06, "loss": 17.5772, "step": 12899 }, { "epoch": 0.2358016341601623, "grad_norm": 7.346719368535289, "learning_rate": 8.929998610035777e-06, "loss": 17.6624, "step": 12900 }, { "epoch": 0.23581991335660885, "grad_norm": 6.863613585163401, "learning_rate": 8.929815600868286e-06, "loss": 17.5932, "step": 12901 }, { "epoch": 0.23583819255305538, "grad_norm": 6.334064267736984, "learning_rate": 8.929632577927133e-06, "loss": 17.518, "step": 12902 }, { "epoch": 0.2358564717495019, "grad_norm": 7.053893819976255, "learning_rate": 8.92944954121296e-06, "loss": 17.9189, "step": 12903 }, { "epoch": 0.23587475094594842, "grad_norm": 7.980046746522379, "learning_rate": 8.929266490726408e-06, "loss": 17.9997, "step": 12904 }, { "epoch": 0.23589303014239493, "grad_norm": 6.962348750119907, "learning_rate": 8.929083426468117e-06, "loss": 17.6001, "step": 12905 }, { "epoch": 0.23591130933884147, "grad_norm": 7.442237121091488, "learning_rate": 8.92890034843873e-06, "loss": 17.8534, "step": 12906 }, { "epoch": 0.23592958853528798, "grad_norm": 6.388804993595424, "learning_rate": 8.928717256638887e-06, "loss": 17.6104, "step": 12907 }, { "epoch": 0.2359478677317345, "grad_norm": 5.611522836208369, "learning_rate": 8.928534151069231e-06, "loss": 17.0817, "step": 12908 }, { "epoch": 0.23596614692818105, "grad_norm": 7.23191169122668, "learning_rate": 8.928351031730405e-06, "loss": 17.8185, "step": 12909 }, { "epoch": 0.23598442612462756, "grad_norm": 6.204385501247323, "learning_rate": 8.928167898623048e-06, "loss": 17.4054, "step": 12910 }, { "epoch": 0.2360027053210741, "grad_norm": 8.513596232673757, "learning_rate": 8.927984751747805e-06, "loss": 18.1255, "step": 12911 }, { "epoch": 0.2360209845175206, "grad_norm": 6.8787495159890035, "learning_rate": 8.927801591105314e-06, "loss": 17.8178, "step": 12912 }, { "epoch": 0.23603926371396713, "grad_norm": 7.396079286809878, "learning_rate": 8.92761841669622e-06, "loss": 17.8402, "step": 12913 }, { "epoch": 0.23605754291041367, "grad_norm": 6.512736366078796, "learning_rate": 8.927435228521166e-06, "loss": 17.4054, "step": 12914 }, { "epoch": 0.23607582210686018, "grad_norm": 7.197978482763735, "learning_rate": 8.92725202658079e-06, "loss": 17.8006, "step": 12915 }, { "epoch": 0.2360941013033067, "grad_norm": 6.898895637699667, "learning_rate": 8.927068810875739e-06, "loss": 17.586, "step": 12916 }, { "epoch": 0.23611238049975322, "grad_norm": 6.525968119741383, "learning_rate": 8.92688558140665e-06, "loss": 17.5909, "step": 12917 }, { "epoch": 0.23613065969619976, "grad_norm": 7.4227272985848565, "learning_rate": 8.92670233817417e-06, "loss": 17.6597, "step": 12918 }, { "epoch": 0.2361489388926463, "grad_norm": 5.584828933954418, "learning_rate": 8.926519081178938e-06, "loss": 17.2085, "step": 12919 }, { "epoch": 0.2361672180890928, "grad_norm": 6.658550046066718, "learning_rate": 8.926335810421598e-06, "loss": 17.8171, "step": 12920 }, { "epoch": 0.23618549728553934, "grad_norm": 7.349185977238887, "learning_rate": 8.926152525902792e-06, "loss": 18.0539, "step": 12921 }, { "epoch": 0.23620377648198584, "grad_norm": 6.463275205700063, "learning_rate": 8.92596922762316e-06, "loss": 17.5902, "step": 12922 }, { "epoch": 0.23622205567843238, "grad_norm": 8.628181653084809, "learning_rate": 8.925785915583348e-06, "loss": 17.5339, "step": 12923 }, { "epoch": 0.2362403348748789, "grad_norm": 8.540834670203148, "learning_rate": 8.925602589783996e-06, "loss": 18.7885, "step": 12924 }, { "epoch": 0.23625861407132542, "grad_norm": 5.297173130657822, "learning_rate": 8.925419250225748e-06, "loss": 16.9093, "step": 12925 }, { "epoch": 0.23627689326777196, "grad_norm": 6.726903272632686, "learning_rate": 8.925235896909249e-06, "loss": 17.4664, "step": 12926 }, { "epoch": 0.23629517246421847, "grad_norm": 6.332665712850595, "learning_rate": 8.925052529835135e-06, "loss": 17.659, "step": 12927 }, { "epoch": 0.236313451660665, "grad_norm": 7.600987729347651, "learning_rate": 8.924869149004054e-06, "loss": 18.2163, "step": 12928 }, { "epoch": 0.2363317308571115, "grad_norm": 5.9369767914865115, "learning_rate": 8.924685754416647e-06, "loss": 17.4254, "step": 12929 }, { "epoch": 0.23635001005355805, "grad_norm": 7.368030724782585, "learning_rate": 8.924502346073557e-06, "loss": 17.8777, "step": 12930 }, { "epoch": 0.23636828925000458, "grad_norm": 6.516669152894072, "learning_rate": 8.924318923975427e-06, "loss": 17.4103, "step": 12931 }, { "epoch": 0.2363865684464511, "grad_norm": 7.678784942366478, "learning_rate": 8.924135488122901e-06, "loss": 17.7098, "step": 12932 }, { "epoch": 0.23640484764289763, "grad_norm": 7.832311221067831, "learning_rate": 8.923952038516618e-06, "loss": 18.3829, "step": 12933 }, { "epoch": 0.23642312683934413, "grad_norm": 6.78578352786296, "learning_rate": 8.923768575157225e-06, "loss": 17.9866, "step": 12934 }, { "epoch": 0.23644140603579067, "grad_norm": 8.651035737802951, "learning_rate": 8.923585098045362e-06, "loss": 17.7727, "step": 12935 }, { "epoch": 0.2364596852322372, "grad_norm": 13.048462594403414, "learning_rate": 8.923401607181676e-06, "loss": 17.1627, "step": 12936 }, { "epoch": 0.2364779644286837, "grad_norm": 5.5618522535398345, "learning_rate": 8.923218102566807e-06, "loss": 17.355, "step": 12937 }, { "epoch": 0.23649624362513025, "grad_norm": 5.53513557869465, "learning_rate": 8.923034584201399e-06, "loss": 17.1682, "step": 12938 }, { "epoch": 0.23651452282157676, "grad_norm": 6.201165463046301, "learning_rate": 8.922851052086095e-06, "loss": 17.4474, "step": 12939 }, { "epoch": 0.2365328020180233, "grad_norm": 6.598837901107383, "learning_rate": 8.922667506221538e-06, "loss": 17.4188, "step": 12940 }, { "epoch": 0.2365510812144698, "grad_norm": 8.209426477597665, "learning_rate": 8.922483946608373e-06, "loss": 17.8844, "step": 12941 }, { "epoch": 0.23656936041091634, "grad_norm": 6.694747798026469, "learning_rate": 8.922300373247243e-06, "loss": 17.669, "step": 12942 }, { "epoch": 0.23658763960736287, "grad_norm": 8.050533141119972, "learning_rate": 8.922116786138787e-06, "loss": 18.3496, "step": 12943 }, { "epoch": 0.23660591880380938, "grad_norm": 8.172578495070955, "learning_rate": 8.921933185283655e-06, "loss": 18.6932, "step": 12944 }, { "epoch": 0.23662419800025591, "grad_norm": 6.267768583441307, "learning_rate": 8.921749570682487e-06, "loss": 17.3662, "step": 12945 }, { "epoch": 0.23664247719670242, "grad_norm": 6.258043866978442, "learning_rate": 8.921565942335926e-06, "loss": 17.7663, "step": 12946 }, { "epoch": 0.23666075639314896, "grad_norm": 8.38711526934771, "learning_rate": 8.92138230024462e-06, "loss": 18.1128, "step": 12947 }, { "epoch": 0.2366790355895955, "grad_norm": 6.5203555330921095, "learning_rate": 8.921198644409205e-06, "loss": 17.3761, "step": 12948 }, { "epoch": 0.236697314786042, "grad_norm": 7.421198378402833, "learning_rate": 8.92101497483033e-06, "loss": 17.9431, "step": 12949 }, { "epoch": 0.23671559398248854, "grad_norm": 7.692947297720974, "learning_rate": 8.92083129150864e-06, "loss": 17.9351, "step": 12950 }, { "epoch": 0.23673387317893504, "grad_norm": 6.8739816391982735, "learning_rate": 8.920647594444774e-06, "loss": 17.3881, "step": 12951 }, { "epoch": 0.23675215237538158, "grad_norm": 10.080419192852398, "learning_rate": 8.92046388363938e-06, "loss": 17.6047, "step": 12952 }, { "epoch": 0.23677043157182812, "grad_norm": 5.3604729480078825, "learning_rate": 8.9202801590931e-06, "loss": 17.0675, "step": 12953 }, { "epoch": 0.23678871076827462, "grad_norm": 8.015903862743421, "learning_rate": 8.920096420806578e-06, "loss": 17.9858, "step": 12954 }, { "epoch": 0.23680698996472116, "grad_norm": 7.491044387526426, "learning_rate": 8.919912668780458e-06, "loss": 17.679, "step": 12955 }, { "epoch": 0.23682526916116767, "grad_norm": 8.120830097300725, "learning_rate": 8.919728903015383e-06, "loss": 18.2456, "step": 12956 }, { "epoch": 0.2368435483576142, "grad_norm": 6.5733303166914725, "learning_rate": 8.919545123512001e-06, "loss": 17.4546, "step": 12957 }, { "epoch": 0.2368618275540607, "grad_norm": 7.039356996593743, "learning_rate": 8.919361330270953e-06, "loss": 17.6232, "step": 12958 }, { "epoch": 0.23688010675050725, "grad_norm": 5.745232514859346, "learning_rate": 8.919177523292882e-06, "loss": 17.0815, "step": 12959 }, { "epoch": 0.23689838594695378, "grad_norm": 7.6415996339346, "learning_rate": 8.918993702578435e-06, "loss": 17.7463, "step": 12960 }, { "epoch": 0.2369166651434003, "grad_norm": 6.798791473928604, "learning_rate": 8.918809868128255e-06, "loss": 17.4995, "step": 12961 }, { "epoch": 0.23693494433984683, "grad_norm": 7.3922512658335915, "learning_rate": 8.918626019942987e-06, "loss": 17.8245, "step": 12962 }, { "epoch": 0.23695322353629333, "grad_norm": 6.017111653920449, "learning_rate": 8.918442158023272e-06, "loss": 17.3875, "step": 12963 }, { "epoch": 0.23697150273273987, "grad_norm": 6.253744335544148, "learning_rate": 8.91825828236976e-06, "loss": 17.5898, "step": 12964 }, { "epoch": 0.2369897819291864, "grad_norm": 5.89477590318735, "learning_rate": 8.918074392983093e-06, "loss": 17.1829, "step": 12965 }, { "epoch": 0.2370080611256329, "grad_norm": 6.316536465879392, "learning_rate": 8.917890489863915e-06, "loss": 17.3599, "step": 12966 }, { "epoch": 0.23702634032207945, "grad_norm": 6.1109856005017855, "learning_rate": 8.91770657301287e-06, "loss": 17.5658, "step": 12967 }, { "epoch": 0.23704461951852596, "grad_norm": 6.237782407754986, "learning_rate": 8.917522642430603e-06, "loss": 17.3666, "step": 12968 }, { "epoch": 0.2370628987149725, "grad_norm": 5.6297601624356215, "learning_rate": 8.91733869811776e-06, "loss": 17.3055, "step": 12969 }, { "epoch": 0.23708117791141903, "grad_norm": 6.639135237707821, "learning_rate": 8.917154740074984e-06, "loss": 17.4063, "step": 12970 }, { "epoch": 0.23709945710786554, "grad_norm": 9.328502384457716, "learning_rate": 8.916970768302921e-06, "loss": 18.093, "step": 12971 }, { "epoch": 0.23711773630431207, "grad_norm": 8.167995637422464, "learning_rate": 8.916786782802216e-06, "loss": 17.8628, "step": 12972 }, { "epoch": 0.23713601550075858, "grad_norm": 6.9413891111221, "learning_rate": 8.916602783573514e-06, "loss": 17.4451, "step": 12973 }, { "epoch": 0.23715429469720511, "grad_norm": 5.304652642268245, "learning_rate": 8.916418770617457e-06, "loss": 17.0545, "step": 12974 }, { "epoch": 0.23717257389365162, "grad_norm": 5.701778711333664, "learning_rate": 8.916234743934693e-06, "loss": 17.1716, "step": 12975 }, { "epoch": 0.23719085309009816, "grad_norm": 5.964328240908642, "learning_rate": 8.916050703525867e-06, "loss": 17.4856, "step": 12976 }, { "epoch": 0.2372091322865447, "grad_norm": 7.325778422518426, "learning_rate": 8.91586664939162e-06, "loss": 18.1201, "step": 12977 }, { "epoch": 0.2372274114829912, "grad_norm": 7.438311462390998, "learning_rate": 8.915682581532604e-06, "loss": 17.6801, "step": 12978 }, { "epoch": 0.23724569067943774, "grad_norm": 7.920146329375921, "learning_rate": 8.915498499949458e-06, "loss": 17.7486, "step": 12979 }, { "epoch": 0.23726396987588425, "grad_norm": 6.617229114538101, "learning_rate": 8.91531440464283e-06, "loss": 17.3874, "step": 12980 }, { "epoch": 0.23728224907233078, "grad_norm": 13.694352843739813, "learning_rate": 8.915130295613364e-06, "loss": 17.8205, "step": 12981 }, { "epoch": 0.23730052826877732, "grad_norm": 7.00801840795046, "learning_rate": 8.914946172861707e-06, "loss": 17.4743, "step": 12982 }, { "epoch": 0.23731880746522382, "grad_norm": 6.043636742624499, "learning_rate": 8.914762036388504e-06, "loss": 17.3542, "step": 12983 }, { "epoch": 0.23733708666167036, "grad_norm": 6.713773364585318, "learning_rate": 8.914577886194399e-06, "loss": 17.7853, "step": 12984 }, { "epoch": 0.23735536585811687, "grad_norm": 8.468798535179605, "learning_rate": 8.914393722280039e-06, "loss": 17.895, "step": 12985 }, { "epoch": 0.2373736450545634, "grad_norm": 6.485210633111785, "learning_rate": 8.914209544646066e-06, "loss": 17.6248, "step": 12986 }, { "epoch": 0.23739192425100994, "grad_norm": 6.579365789610776, "learning_rate": 8.914025353293132e-06, "loss": 17.8761, "step": 12987 }, { "epoch": 0.23741020344745645, "grad_norm": 6.270679818890554, "learning_rate": 8.913841148221875e-06, "loss": 17.2867, "step": 12988 }, { "epoch": 0.23742848264390298, "grad_norm": 7.067963757021928, "learning_rate": 8.913656929432948e-06, "loss": 17.7708, "step": 12989 }, { "epoch": 0.2374467618403495, "grad_norm": 6.913535836401947, "learning_rate": 8.91347269692699e-06, "loss": 17.6606, "step": 12990 }, { "epoch": 0.23746504103679603, "grad_norm": 8.262615181485657, "learning_rate": 8.913288450704653e-06, "loss": 18.3434, "step": 12991 }, { "epoch": 0.23748332023324253, "grad_norm": 6.780892302162567, "learning_rate": 8.913104190766577e-06, "loss": 17.4476, "step": 12992 }, { "epoch": 0.23750159942968907, "grad_norm": 5.48441401148886, "learning_rate": 8.912919917113412e-06, "loss": 17.1764, "step": 12993 }, { "epoch": 0.2375198786261356, "grad_norm": 8.026787375146164, "learning_rate": 8.9127356297458e-06, "loss": 18.0069, "step": 12994 }, { "epoch": 0.2375381578225821, "grad_norm": 7.492718384645257, "learning_rate": 8.912551328664392e-06, "loss": 18.2687, "step": 12995 }, { "epoch": 0.23755643701902865, "grad_norm": 7.000165263766671, "learning_rate": 8.91236701386983e-06, "loss": 17.9321, "step": 12996 }, { "epoch": 0.23757471621547516, "grad_norm": 7.877236264654493, "learning_rate": 8.91218268536276e-06, "loss": 17.5602, "step": 12997 }, { "epoch": 0.2375929954119217, "grad_norm": 6.946470021963183, "learning_rate": 8.91199834314383e-06, "loss": 17.5178, "step": 12998 }, { "epoch": 0.23761127460836823, "grad_norm": 5.90998841843763, "learning_rate": 8.911813987213685e-06, "loss": 17.1298, "step": 12999 }, { "epoch": 0.23762955380481474, "grad_norm": 6.551602095948039, "learning_rate": 8.911629617572971e-06, "loss": 17.5774, "step": 13000 }, { "epoch": 0.23764783300126127, "grad_norm": 5.662640598811488, "learning_rate": 8.911445234222335e-06, "loss": 17.0583, "step": 13001 }, { "epoch": 0.23766611219770778, "grad_norm": 7.234619571718053, "learning_rate": 8.911260837162423e-06, "loss": 18.1144, "step": 13002 }, { "epoch": 0.23768439139415432, "grad_norm": 6.715066588459917, "learning_rate": 8.911076426393881e-06, "loss": 17.698, "step": 13003 }, { "epoch": 0.23770267059060085, "grad_norm": 6.230777428028448, "learning_rate": 8.910892001917357e-06, "loss": 17.3361, "step": 13004 }, { "epoch": 0.23772094978704736, "grad_norm": 6.988059654715737, "learning_rate": 8.910707563733495e-06, "loss": 17.3595, "step": 13005 }, { "epoch": 0.2377392289834939, "grad_norm": 7.159736432205771, "learning_rate": 8.910523111842942e-06, "loss": 17.7156, "step": 13006 }, { "epoch": 0.2377575081799404, "grad_norm": 7.3308498510596225, "learning_rate": 8.910338646246344e-06, "loss": 17.9566, "step": 13007 }, { "epoch": 0.23777578737638694, "grad_norm": 7.0589468435521825, "learning_rate": 8.910154166944348e-06, "loss": 17.6011, "step": 13008 }, { "epoch": 0.23779406657283345, "grad_norm": 6.9181011176341585, "learning_rate": 8.909969673937603e-06, "loss": 17.7502, "step": 13009 }, { "epoch": 0.23781234576927998, "grad_norm": 6.4022308663041985, "learning_rate": 8.909785167226755e-06, "loss": 17.4139, "step": 13010 }, { "epoch": 0.23783062496572652, "grad_norm": 6.066161465753598, "learning_rate": 8.909600646812446e-06, "loss": 17.4185, "step": 13011 }, { "epoch": 0.23784890416217302, "grad_norm": 8.197393433135085, "learning_rate": 8.909416112695327e-06, "loss": 18.1142, "step": 13012 }, { "epoch": 0.23786718335861956, "grad_norm": 6.369683808467332, "learning_rate": 8.909231564876045e-06, "loss": 17.5209, "step": 13013 }, { "epoch": 0.23788546255506607, "grad_norm": 7.031029507963712, "learning_rate": 8.909047003355244e-06, "loss": 17.7567, "step": 13014 }, { "epoch": 0.2379037417515126, "grad_norm": 7.37520805755393, "learning_rate": 8.90886242813357e-06, "loss": 18.1819, "step": 13015 }, { "epoch": 0.23792202094795914, "grad_norm": 6.454543253959291, "learning_rate": 8.908677839211677e-06, "loss": 17.4121, "step": 13016 }, { "epoch": 0.23794030014440565, "grad_norm": 7.113403509963748, "learning_rate": 8.908493236590206e-06, "loss": 17.695, "step": 13017 }, { "epoch": 0.23795857934085218, "grad_norm": 5.791101109650206, "learning_rate": 8.908308620269806e-06, "loss": 17.2096, "step": 13018 }, { "epoch": 0.2379768585372987, "grad_norm": 6.6434223453450105, "learning_rate": 8.908123990251121e-06, "loss": 17.7579, "step": 13019 }, { "epoch": 0.23799513773374523, "grad_norm": 5.8890298099976075, "learning_rate": 8.907939346534802e-06, "loss": 17.3712, "step": 13020 }, { "epoch": 0.23801341693019176, "grad_norm": 6.076916740522336, "learning_rate": 8.907754689121495e-06, "loss": 17.2601, "step": 13021 }, { "epoch": 0.23803169612663827, "grad_norm": 6.499479833887206, "learning_rate": 8.907570018011846e-06, "loss": 17.448, "step": 13022 }, { "epoch": 0.2380499753230848, "grad_norm": 6.908927066972406, "learning_rate": 8.907385333206505e-06, "loss": 17.8089, "step": 13023 }, { "epoch": 0.2380682545195313, "grad_norm": 9.149487012087734, "learning_rate": 8.907200634706116e-06, "loss": 18.5294, "step": 13024 }, { "epoch": 0.23808653371597785, "grad_norm": 7.6501435084001175, "learning_rate": 8.907015922511329e-06, "loss": 18.1171, "step": 13025 }, { "epoch": 0.23810481291242436, "grad_norm": 6.763376327046655, "learning_rate": 8.906831196622787e-06, "loss": 17.5626, "step": 13026 }, { "epoch": 0.2381230921088709, "grad_norm": 7.702371224133755, "learning_rate": 8.906646457041144e-06, "loss": 17.7776, "step": 13027 }, { "epoch": 0.23814137130531743, "grad_norm": 7.977697597983092, "learning_rate": 8.906461703767043e-06, "loss": 18.0928, "step": 13028 }, { "epoch": 0.23815965050176394, "grad_norm": 5.379696040297563, "learning_rate": 8.906276936801132e-06, "loss": 17.231, "step": 13029 }, { "epoch": 0.23817792969821047, "grad_norm": 6.577581666634239, "learning_rate": 8.906092156144062e-06, "loss": 17.7253, "step": 13030 }, { "epoch": 0.23819620889465698, "grad_norm": 8.389811716463779, "learning_rate": 8.905907361796476e-06, "loss": 17.9966, "step": 13031 }, { "epoch": 0.23821448809110352, "grad_norm": 6.530808002128086, "learning_rate": 8.905722553759023e-06, "loss": 17.6125, "step": 13032 }, { "epoch": 0.23823276728755005, "grad_norm": 6.515670033066622, "learning_rate": 8.905537732032352e-06, "loss": 17.6081, "step": 13033 }, { "epoch": 0.23825104648399656, "grad_norm": 5.760940434326444, "learning_rate": 8.90535289661711e-06, "loss": 17.2928, "step": 13034 }, { "epoch": 0.2382693256804431, "grad_norm": 8.140661054656807, "learning_rate": 8.905168047513945e-06, "loss": 18.464, "step": 13035 }, { "epoch": 0.2382876048768896, "grad_norm": 5.78604602671709, "learning_rate": 8.904983184723505e-06, "loss": 17.4396, "step": 13036 }, { "epoch": 0.23830588407333614, "grad_norm": 7.311959534577604, "learning_rate": 8.90479830824644e-06, "loss": 17.2399, "step": 13037 }, { "epoch": 0.23832416326978267, "grad_norm": 7.868191176366132, "learning_rate": 8.904613418083393e-06, "loss": 17.9482, "step": 13038 }, { "epoch": 0.23834244246622918, "grad_norm": 7.234337688278188, "learning_rate": 8.904428514235016e-06, "loss": 17.7567, "step": 13039 }, { "epoch": 0.23836072166267572, "grad_norm": 6.489097226413424, "learning_rate": 8.904243596701954e-06, "loss": 17.5105, "step": 13040 }, { "epoch": 0.23837900085912223, "grad_norm": 5.794715028733325, "learning_rate": 8.904058665484859e-06, "loss": 17.4734, "step": 13041 }, { "epoch": 0.23839728005556876, "grad_norm": 6.8670725460927295, "learning_rate": 8.903873720584376e-06, "loss": 17.6704, "step": 13042 }, { "epoch": 0.23841555925201527, "grad_norm": 6.4415121819820165, "learning_rate": 8.903688762001154e-06, "loss": 17.5562, "step": 13043 }, { "epoch": 0.2384338384484618, "grad_norm": 7.108884199427061, "learning_rate": 8.903503789735843e-06, "loss": 17.8931, "step": 13044 }, { "epoch": 0.23845211764490834, "grad_norm": 7.446173452101878, "learning_rate": 8.903318803789089e-06, "loss": 18.1885, "step": 13045 }, { "epoch": 0.23847039684135485, "grad_norm": 7.67893773097202, "learning_rate": 8.903133804161543e-06, "loss": 17.9477, "step": 13046 }, { "epoch": 0.23848867603780138, "grad_norm": 6.6717209776126865, "learning_rate": 8.90294879085385e-06, "loss": 17.8488, "step": 13047 }, { "epoch": 0.2385069552342479, "grad_norm": 6.984363402847883, "learning_rate": 8.902763763866661e-06, "loss": 17.969, "step": 13048 }, { "epoch": 0.23852523443069443, "grad_norm": 5.666023704553353, "learning_rate": 8.902578723200623e-06, "loss": 17.2028, "step": 13049 }, { "epoch": 0.23854351362714096, "grad_norm": 6.75865032362205, "learning_rate": 8.902393668856386e-06, "loss": 17.4377, "step": 13050 }, { "epoch": 0.23856179282358747, "grad_norm": 9.262343010243793, "learning_rate": 8.902208600834596e-06, "loss": 18.5766, "step": 13051 }, { "epoch": 0.238580072020034, "grad_norm": 7.539999763369353, "learning_rate": 8.902023519135906e-06, "loss": 17.9697, "step": 13052 }, { "epoch": 0.23859835121648051, "grad_norm": 6.9925561920139385, "learning_rate": 8.901838423760962e-06, "loss": 17.8635, "step": 13053 }, { "epoch": 0.23861663041292705, "grad_norm": 9.722389640327213, "learning_rate": 8.90165331471041e-06, "loss": 17.665, "step": 13054 }, { "epoch": 0.23863490960937359, "grad_norm": 7.012750000220342, "learning_rate": 8.901468191984905e-06, "loss": 17.9913, "step": 13055 }, { "epoch": 0.2386531888058201, "grad_norm": 6.897659451433708, "learning_rate": 8.901283055585091e-06, "loss": 18.0401, "step": 13056 }, { "epoch": 0.23867146800226663, "grad_norm": 5.730732391719401, "learning_rate": 8.90109790551162e-06, "loss": 17.2895, "step": 13057 }, { "epoch": 0.23868974719871314, "grad_norm": 8.528304224384835, "learning_rate": 8.900912741765137e-06, "loss": 19.029, "step": 13058 }, { "epoch": 0.23870802639515967, "grad_norm": 7.6515832604941565, "learning_rate": 8.900727564346294e-06, "loss": 18.3438, "step": 13059 }, { "epoch": 0.23872630559160618, "grad_norm": 6.588949651884337, "learning_rate": 8.90054237325574e-06, "loss": 17.5206, "step": 13060 }, { "epoch": 0.23874458478805272, "grad_norm": 5.966831394317411, "learning_rate": 8.900357168494123e-06, "loss": 17.5629, "step": 13061 }, { "epoch": 0.23876286398449925, "grad_norm": 5.853515766728405, "learning_rate": 8.900171950062092e-06, "loss": 17.0216, "step": 13062 }, { "epoch": 0.23878114318094576, "grad_norm": 5.879637566081577, "learning_rate": 8.899986717960298e-06, "loss": 17.4537, "step": 13063 }, { "epoch": 0.2387994223773923, "grad_norm": 6.965208279550614, "learning_rate": 8.899801472189389e-06, "loss": 17.9539, "step": 13064 }, { "epoch": 0.2388177015738388, "grad_norm": 6.346859650529671, "learning_rate": 8.899616212750013e-06, "loss": 17.3691, "step": 13065 }, { "epoch": 0.23883598077028534, "grad_norm": 10.205520720902255, "learning_rate": 8.899430939642822e-06, "loss": 17.953, "step": 13066 }, { "epoch": 0.23885425996673187, "grad_norm": 6.291004518877176, "learning_rate": 8.899245652868461e-06, "loss": 17.7778, "step": 13067 }, { "epoch": 0.23887253916317838, "grad_norm": 7.281603008158021, "learning_rate": 8.899060352427587e-06, "loss": 17.8255, "step": 13068 }, { "epoch": 0.23889081835962492, "grad_norm": 7.89320208932405, "learning_rate": 8.898875038320842e-06, "loss": 17.6524, "step": 13069 }, { "epoch": 0.23890909755607143, "grad_norm": 6.083023775630701, "learning_rate": 8.898689710548878e-06, "loss": 17.5337, "step": 13070 }, { "epoch": 0.23892737675251796, "grad_norm": 6.442767289991403, "learning_rate": 8.898504369112346e-06, "loss": 17.5752, "step": 13071 }, { "epoch": 0.2389456559489645, "grad_norm": 5.926364232680868, "learning_rate": 8.898319014011893e-06, "loss": 17.4152, "step": 13072 }, { "epoch": 0.238963935145411, "grad_norm": 5.853578651618931, "learning_rate": 8.898133645248172e-06, "loss": 17.361, "step": 13073 }, { "epoch": 0.23898221434185754, "grad_norm": 8.037353131529064, "learning_rate": 8.89794826282183e-06, "loss": 18.3901, "step": 13074 }, { "epoch": 0.23900049353830405, "grad_norm": 6.839033028328603, "learning_rate": 8.897762866733516e-06, "loss": 17.7177, "step": 13075 }, { "epoch": 0.23901877273475058, "grad_norm": 6.458383770802645, "learning_rate": 8.897577456983884e-06, "loss": 17.5274, "step": 13076 }, { "epoch": 0.2390370519311971, "grad_norm": 6.186196520678368, "learning_rate": 8.89739203357358e-06, "loss": 17.5473, "step": 13077 }, { "epoch": 0.23905533112764363, "grad_norm": 6.113652323107251, "learning_rate": 8.897206596503256e-06, "loss": 17.2725, "step": 13078 }, { "epoch": 0.23907361032409016, "grad_norm": 7.155653850176197, "learning_rate": 8.89702114577356e-06, "loss": 17.838, "step": 13079 }, { "epoch": 0.23909188952053667, "grad_norm": 6.4961595382647825, "learning_rate": 8.896835681385143e-06, "loss": 17.5084, "step": 13080 }, { "epoch": 0.2391101687169832, "grad_norm": 5.803814602285096, "learning_rate": 8.896650203338655e-06, "loss": 17.1967, "step": 13081 }, { "epoch": 0.23912844791342971, "grad_norm": 6.125421174580098, "learning_rate": 8.896464711634748e-06, "loss": 17.5201, "step": 13082 }, { "epoch": 0.23914672710987625, "grad_norm": 7.170635367768776, "learning_rate": 8.896279206274069e-06, "loss": 17.9011, "step": 13083 }, { "epoch": 0.23916500630632279, "grad_norm": 7.9103584189316765, "learning_rate": 8.89609368725727e-06, "loss": 17.7902, "step": 13084 }, { "epoch": 0.2391832855027693, "grad_norm": 5.583691209122781, "learning_rate": 8.895908154585e-06, "loss": 17.2785, "step": 13085 }, { "epoch": 0.23920156469921583, "grad_norm": 6.787952200758381, "learning_rate": 8.895722608257909e-06, "loss": 17.717, "step": 13086 }, { "epoch": 0.23921984389566234, "grad_norm": 6.404965713753553, "learning_rate": 8.89553704827665e-06, "loss": 17.1054, "step": 13087 }, { "epoch": 0.23923812309210887, "grad_norm": 6.5079624006987205, "learning_rate": 8.895351474641872e-06, "loss": 17.4883, "step": 13088 }, { "epoch": 0.2392564022885554, "grad_norm": 6.930271806626031, "learning_rate": 8.895165887354225e-06, "loss": 17.6584, "step": 13089 }, { "epoch": 0.23927468148500192, "grad_norm": 7.241331026542094, "learning_rate": 8.894980286414358e-06, "loss": 17.7407, "step": 13090 }, { "epoch": 0.23929296068144845, "grad_norm": 8.199361623072374, "learning_rate": 8.894794671822924e-06, "loss": 18.387, "step": 13091 }, { "epoch": 0.23931123987789496, "grad_norm": 6.145782542820028, "learning_rate": 8.894609043580573e-06, "loss": 17.5219, "step": 13092 }, { "epoch": 0.2393295190743415, "grad_norm": 5.964790287197087, "learning_rate": 8.894423401687954e-06, "loss": 17.2074, "step": 13093 }, { "epoch": 0.239347798270788, "grad_norm": 7.531933580267063, "learning_rate": 8.894237746145719e-06, "loss": 18.2056, "step": 13094 }, { "epoch": 0.23936607746723454, "grad_norm": 6.628781494873603, "learning_rate": 8.894052076954521e-06, "loss": 17.7886, "step": 13095 }, { "epoch": 0.23938435666368107, "grad_norm": 7.072780596815424, "learning_rate": 8.893866394115006e-06, "loss": 17.6488, "step": 13096 }, { "epoch": 0.23940263586012758, "grad_norm": 7.582250839989128, "learning_rate": 8.893680697627829e-06, "loss": 17.9527, "step": 13097 }, { "epoch": 0.23942091505657412, "grad_norm": 8.00874576216099, "learning_rate": 8.893494987493637e-06, "loss": 18.2301, "step": 13098 }, { "epoch": 0.23943919425302063, "grad_norm": 6.624719063862705, "learning_rate": 8.893309263713084e-06, "loss": 17.5278, "step": 13099 }, { "epoch": 0.23945747344946716, "grad_norm": 6.073831096744187, "learning_rate": 8.89312352628682e-06, "loss": 17.3268, "step": 13100 }, { "epoch": 0.2394757526459137, "grad_norm": 5.6543958726883945, "learning_rate": 8.892937775215496e-06, "loss": 17.3742, "step": 13101 }, { "epoch": 0.2394940318423602, "grad_norm": 6.598721554417625, "learning_rate": 8.892752010499762e-06, "loss": 17.414, "step": 13102 }, { "epoch": 0.23951231103880674, "grad_norm": 8.802692124687134, "learning_rate": 8.892566232140271e-06, "loss": 18.4354, "step": 13103 }, { "epoch": 0.23953059023525325, "grad_norm": 7.377216148339256, "learning_rate": 8.892380440137674e-06, "loss": 17.6755, "step": 13104 }, { "epoch": 0.23954886943169978, "grad_norm": 6.042696197432081, "learning_rate": 8.892194634492619e-06, "loss": 17.3203, "step": 13105 }, { "epoch": 0.23956714862814632, "grad_norm": 7.389500299627137, "learning_rate": 8.89200881520576e-06, "loss": 18.1021, "step": 13106 }, { "epoch": 0.23958542782459283, "grad_norm": 6.678872073304817, "learning_rate": 8.89182298227775e-06, "loss": 17.6422, "step": 13107 }, { "epoch": 0.23960370702103936, "grad_norm": 6.928150862474782, "learning_rate": 8.891637135709236e-06, "loss": 17.8863, "step": 13108 }, { "epoch": 0.23962198621748587, "grad_norm": 7.159367597370521, "learning_rate": 8.891451275500872e-06, "loss": 17.8805, "step": 13109 }, { "epoch": 0.2396402654139324, "grad_norm": 5.940165891535454, "learning_rate": 8.891265401653308e-06, "loss": 17.1754, "step": 13110 }, { "epoch": 0.23965854461037892, "grad_norm": 8.137977278209508, "learning_rate": 8.891079514167198e-06, "loss": 18.3722, "step": 13111 }, { "epoch": 0.23967682380682545, "grad_norm": 6.995921613837327, "learning_rate": 8.890893613043191e-06, "loss": 17.7443, "step": 13112 }, { "epoch": 0.239695103003272, "grad_norm": 6.8188755108798995, "learning_rate": 8.890707698281941e-06, "loss": 17.7557, "step": 13113 }, { "epoch": 0.2397133821997185, "grad_norm": 7.630031424376271, "learning_rate": 8.890521769884097e-06, "loss": 18.0336, "step": 13114 }, { "epoch": 0.23973166139616503, "grad_norm": 6.747272391453249, "learning_rate": 8.890335827850312e-06, "loss": 17.648, "step": 13115 }, { "epoch": 0.23974994059261154, "grad_norm": 5.524067398388494, "learning_rate": 8.890149872181237e-06, "loss": 17.2013, "step": 13116 }, { "epoch": 0.23976821978905807, "grad_norm": 7.001019747305786, "learning_rate": 8.889963902877525e-06, "loss": 17.6696, "step": 13117 }, { "epoch": 0.2397864989855046, "grad_norm": 5.892740280706896, "learning_rate": 8.889777919939827e-06, "loss": 17.3016, "step": 13118 }, { "epoch": 0.23980477818195112, "grad_norm": 6.4090602622592465, "learning_rate": 8.889591923368794e-06, "loss": 17.3847, "step": 13119 }, { "epoch": 0.23982305737839765, "grad_norm": 6.843988185794902, "learning_rate": 8.88940591316508e-06, "loss": 17.6176, "step": 13120 }, { "epoch": 0.23984133657484416, "grad_norm": 7.653232303178483, "learning_rate": 8.889219889329337e-06, "loss": 17.6849, "step": 13121 }, { "epoch": 0.2398596157712907, "grad_norm": 6.222701452795798, "learning_rate": 8.889033851862213e-06, "loss": 17.0545, "step": 13122 }, { "epoch": 0.23987789496773723, "grad_norm": 6.872948127642796, "learning_rate": 8.888847800764364e-06, "loss": 17.8578, "step": 13123 }, { "epoch": 0.23989617416418374, "grad_norm": 6.182030799781346, "learning_rate": 8.888661736036442e-06, "loss": 17.5718, "step": 13124 }, { "epoch": 0.23991445336063028, "grad_norm": 8.496963706036816, "learning_rate": 8.888475657679096e-06, "loss": 18.1289, "step": 13125 }, { "epoch": 0.23993273255707678, "grad_norm": 6.461940796817892, "learning_rate": 8.88828956569298e-06, "loss": 17.4589, "step": 13126 }, { "epoch": 0.23995101175352332, "grad_norm": 7.833861150936122, "learning_rate": 8.88810346007875e-06, "loss": 17.9519, "step": 13127 }, { "epoch": 0.23996929094996983, "grad_norm": 7.524091408967035, "learning_rate": 8.88791734083705e-06, "loss": 18.0065, "step": 13128 }, { "epoch": 0.23998757014641636, "grad_norm": 7.119184391647039, "learning_rate": 8.887731207968541e-06, "loss": 17.6692, "step": 13129 }, { "epoch": 0.2400058493428629, "grad_norm": 7.258629108754563, "learning_rate": 8.88754506147387e-06, "loss": 18.0561, "step": 13130 }, { "epoch": 0.2400241285393094, "grad_norm": 6.744296533148103, "learning_rate": 8.887358901353691e-06, "loss": 17.7762, "step": 13131 }, { "epoch": 0.24004240773575594, "grad_norm": 6.343742431230002, "learning_rate": 8.887172727608656e-06, "loss": 17.8432, "step": 13132 }, { "epoch": 0.24006068693220245, "grad_norm": 7.908862263357061, "learning_rate": 8.886986540239418e-06, "loss": 17.9585, "step": 13133 }, { "epoch": 0.24007896612864899, "grad_norm": 6.879068165449237, "learning_rate": 8.88680033924663e-06, "loss": 18.0544, "step": 13134 }, { "epoch": 0.24009724532509552, "grad_norm": 6.480055404882929, "learning_rate": 8.886614124630944e-06, "loss": 17.5558, "step": 13135 }, { "epoch": 0.24011552452154203, "grad_norm": 7.085668774874577, "learning_rate": 8.886427896393014e-06, "loss": 17.843, "step": 13136 }, { "epoch": 0.24013380371798856, "grad_norm": 6.890374086423882, "learning_rate": 8.886241654533489e-06, "loss": 17.7323, "step": 13137 }, { "epoch": 0.24015208291443507, "grad_norm": 5.987956008528032, "learning_rate": 8.886055399053023e-06, "loss": 17.2639, "step": 13138 }, { "epoch": 0.2401703621108816, "grad_norm": 6.744770214996133, "learning_rate": 8.885869129952273e-06, "loss": 17.5546, "step": 13139 }, { "epoch": 0.24018864130732814, "grad_norm": 6.60720983685564, "learning_rate": 8.88568284723189e-06, "loss": 17.6936, "step": 13140 }, { "epoch": 0.24020692050377465, "grad_norm": 7.8990858114798845, "learning_rate": 8.885496550892523e-06, "loss": 18.0575, "step": 13141 }, { "epoch": 0.2402251997002212, "grad_norm": 7.336673911300776, "learning_rate": 8.885310240934829e-06, "loss": 17.7049, "step": 13142 }, { "epoch": 0.2402434788966677, "grad_norm": 7.110353962766776, "learning_rate": 8.885123917359459e-06, "loss": 17.8205, "step": 13143 }, { "epoch": 0.24026175809311423, "grad_norm": 7.2817624107343555, "learning_rate": 8.884937580167069e-06, "loss": 18.0881, "step": 13144 }, { "epoch": 0.24028003728956074, "grad_norm": 6.346453683807578, "learning_rate": 8.884751229358307e-06, "loss": 17.5181, "step": 13145 }, { "epoch": 0.24029831648600727, "grad_norm": 9.293193205765766, "learning_rate": 8.884564864933831e-06, "loss": 18.3708, "step": 13146 }, { "epoch": 0.2403165956824538, "grad_norm": 8.133346329073769, "learning_rate": 8.884378486894292e-06, "loss": 18.2355, "step": 13147 }, { "epoch": 0.24033487487890032, "grad_norm": 7.132364080017355, "learning_rate": 8.884192095240342e-06, "loss": 17.7903, "step": 13148 }, { "epoch": 0.24035315407534685, "grad_norm": 8.38705214516052, "learning_rate": 8.884005689972638e-06, "loss": 18.0754, "step": 13149 }, { "epoch": 0.24037143327179336, "grad_norm": 6.907929538270149, "learning_rate": 8.883819271091829e-06, "loss": 17.922, "step": 13150 }, { "epoch": 0.2403897124682399, "grad_norm": 6.488311848475475, "learning_rate": 8.883632838598571e-06, "loss": 17.4053, "step": 13151 }, { "epoch": 0.24040799166468643, "grad_norm": 5.511859400811804, "learning_rate": 8.883446392493517e-06, "loss": 17.0804, "step": 13152 }, { "epoch": 0.24042627086113294, "grad_norm": 7.59345289195484, "learning_rate": 8.883259932777321e-06, "loss": 18.2424, "step": 13153 }, { "epoch": 0.24044455005757948, "grad_norm": 8.42928295970774, "learning_rate": 8.883073459450634e-06, "loss": 18.2357, "step": 13154 }, { "epoch": 0.24046282925402598, "grad_norm": 6.6186807129970635, "learning_rate": 8.882886972514115e-06, "loss": 17.8616, "step": 13155 }, { "epoch": 0.24048110845047252, "grad_norm": 5.684230765944608, "learning_rate": 8.88270047196841e-06, "loss": 16.9709, "step": 13156 }, { "epoch": 0.24049938764691906, "grad_norm": 7.766390927007206, "learning_rate": 8.882513957814181e-06, "loss": 18.3054, "step": 13157 }, { "epoch": 0.24051766684336556, "grad_norm": 8.166219961211254, "learning_rate": 8.882327430052073e-06, "loss": 17.8959, "step": 13158 }, { "epoch": 0.2405359460398121, "grad_norm": 7.978006879651882, "learning_rate": 8.882140888682749e-06, "loss": 18.1603, "step": 13159 }, { "epoch": 0.2405542252362586, "grad_norm": 4.8592994633480835, "learning_rate": 8.881954333706854e-06, "loss": 16.9342, "step": 13160 }, { "epoch": 0.24057250443270514, "grad_norm": 6.951007142144269, "learning_rate": 8.88176776512505e-06, "loss": 17.8005, "step": 13161 }, { "epoch": 0.24059078362915165, "grad_norm": 6.92737058163528, "learning_rate": 8.881581182937983e-06, "loss": 17.4765, "step": 13162 }, { "epoch": 0.24060906282559819, "grad_norm": 6.316452115981913, "learning_rate": 8.881394587146313e-06, "loss": 17.4542, "step": 13163 }, { "epoch": 0.24062734202204472, "grad_norm": 6.695363778855367, "learning_rate": 8.88120797775069e-06, "loss": 17.319, "step": 13164 }, { "epoch": 0.24064562121849123, "grad_norm": 7.128956621944359, "learning_rate": 8.881021354751772e-06, "loss": 17.9394, "step": 13165 }, { "epoch": 0.24066390041493776, "grad_norm": 6.611792036140452, "learning_rate": 8.880834718150209e-06, "loss": 17.4905, "step": 13166 }, { "epoch": 0.24068217961138427, "grad_norm": 7.183528477062133, "learning_rate": 8.880648067946658e-06, "loss": 17.7129, "step": 13167 }, { "epoch": 0.2407004588078308, "grad_norm": 8.30278988075915, "learning_rate": 8.880461404141771e-06, "loss": 18.2827, "step": 13168 }, { "epoch": 0.24071873800427734, "grad_norm": 8.699138208964596, "learning_rate": 8.880274726736204e-06, "loss": 18.3182, "step": 13169 }, { "epoch": 0.24073701720072385, "grad_norm": 6.688449049955284, "learning_rate": 8.880088035730611e-06, "loss": 17.6063, "step": 13170 }, { "epoch": 0.2407552963971704, "grad_norm": 6.367619050050093, "learning_rate": 8.879901331125649e-06, "loss": 17.463, "step": 13171 }, { "epoch": 0.2407735755936169, "grad_norm": 4.951303404699288, "learning_rate": 8.879714612921966e-06, "loss": 16.8762, "step": 13172 }, { "epoch": 0.24079185479006343, "grad_norm": 7.467414625288263, "learning_rate": 8.879527881120222e-06, "loss": 18.2681, "step": 13173 }, { "epoch": 0.24081013398650997, "grad_norm": 7.872630041482053, "learning_rate": 8.879341135721067e-06, "loss": 18.0894, "step": 13174 }, { "epoch": 0.24082841318295647, "grad_norm": 7.730099311866059, "learning_rate": 8.87915437672516e-06, "loss": 17.6484, "step": 13175 }, { "epoch": 0.240846692379403, "grad_norm": 5.927060754664423, "learning_rate": 8.878967604133153e-06, "loss": 17.2848, "step": 13176 }, { "epoch": 0.24086497157584952, "grad_norm": 7.020997094443355, "learning_rate": 8.878780817945701e-06, "loss": 18.143, "step": 13177 }, { "epoch": 0.24088325077229605, "grad_norm": 5.337990125998545, "learning_rate": 8.87859401816346e-06, "loss": 17.156, "step": 13178 }, { "epoch": 0.24090152996874256, "grad_norm": 7.799283810345035, "learning_rate": 8.87840720478708e-06, "loss": 18.2052, "step": 13179 }, { "epoch": 0.2409198091651891, "grad_norm": 7.465493378785632, "learning_rate": 8.878220377817222e-06, "loss": 18.0285, "step": 13180 }, { "epoch": 0.24093808836163563, "grad_norm": 6.178996919767709, "learning_rate": 8.878033537254537e-06, "loss": 17.4663, "step": 13181 }, { "epoch": 0.24095636755808214, "grad_norm": 5.900119250643999, "learning_rate": 8.877846683099684e-06, "loss": 17.1177, "step": 13182 }, { "epoch": 0.24097464675452868, "grad_norm": 8.19533820450328, "learning_rate": 8.877659815353313e-06, "loss": 17.7, "step": 13183 }, { "epoch": 0.24099292595097518, "grad_norm": 6.52315300706894, "learning_rate": 8.87747293401608e-06, "loss": 17.7248, "step": 13184 }, { "epoch": 0.24101120514742172, "grad_norm": 6.058365530682624, "learning_rate": 8.877286039088642e-06, "loss": 17.0972, "step": 13185 }, { "epoch": 0.24102948434386826, "grad_norm": 7.314506776774982, "learning_rate": 8.87709913057165e-06, "loss": 17.7921, "step": 13186 }, { "epoch": 0.24104776354031476, "grad_norm": 6.62011695634416, "learning_rate": 8.876912208465765e-06, "loss": 17.7254, "step": 13187 }, { "epoch": 0.2410660427367613, "grad_norm": 7.241284419157164, "learning_rate": 8.876725272771639e-06, "loss": 17.8527, "step": 13188 }, { "epoch": 0.2410843219332078, "grad_norm": 6.002350376615827, "learning_rate": 8.876538323489925e-06, "loss": 17.4492, "step": 13189 }, { "epoch": 0.24110260112965434, "grad_norm": 6.5183420354195585, "learning_rate": 8.876351360621283e-06, "loss": 17.6424, "step": 13190 }, { "epoch": 0.24112088032610088, "grad_norm": 7.2924557115800726, "learning_rate": 8.876164384166365e-06, "loss": 17.5475, "step": 13191 }, { "epoch": 0.24113915952254739, "grad_norm": 6.838292540672922, "learning_rate": 8.875977394125828e-06, "loss": 17.7877, "step": 13192 }, { "epoch": 0.24115743871899392, "grad_norm": 7.474611917261855, "learning_rate": 8.875790390500325e-06, "loss": 17.638, "step": 13193 }, { "epoch": 0.24117571791544043, "grad_norm": 6.373800063567433, "learning_rate": 8.875603373290515e-06, "loss": 17.3922, "step": 13194 }, { "epoch": 0.24119399711188697, "grad_norm": 6.205982421908158, "learning_rate": 8.875416342497049e-06, "loss": 17.4431, "step": 13195 }, { "epoch": 0.24121227630833347, "grad_norm": 6.899377166810302, "learning_rate": 8.875229298120587e-06, "loss": 17.8066, "step": 13196 }, { "epoch": 0.24123055550478, "grad_norm": 7.718711203766756, "learning_rate": 8.875042240161781e-06, "loss": 17.9228, "step": 13197 }, { "epoch": 0.24124883470122654, "grad_norm": 7.243355889180906, "learning_rate": 8.87485516862129e-06, "loss": 17.8304, "step": 13198 }, { "epoch": 0.24126711389767305, "grad_norm": 7.233421254394535, "learning_rate": 8.874668083499767e-06, "loss": 17.9284, "step": 13199 }, { "epoch": 0.2412853930941196, "grad_norm": 7.8699390802999725, "learning_rate": 8.874480984797869e-06, "loss": 17.888, "step": 13200 }, { "epoch": 0.2413036722905661, "grad_norm": 7.071979508139762, "learning_rate": 8.87429387251625e-06, "loss": 17.6058, "step": 13201 }, { "epoch": 0.24132195148701263, "grad_norm": 6.285841114328047, "learning_rate": 8.874106746655569e-06, "loss": 17.1742, "step": 13202 }, { "epoch": 0.24134023068345917, "grad_norm": 6.578547586790232, "learning_rate": 8.873919607216478e-06, "loss": 17.5148, "step": 13203 }, { "epoch": 0.24135850987990567, "grad_norm": 5.450388431257817, "learning_rate": 8.873732454199638e-06, "loss": 17.0852, "step": 13204 }, { "epoch": 0.2413767890763522, "grad_norm": 7.313273339527103, "learning_rate": 8.873545287605701e-06, "loss": 18.157, "step": 13205 }, { "epoch": 0.24139506827279872, "grad_norm": 6.348646613613565, "learning_rate": 8.873358107435322e-06, "loss": 17.5325, "step": 13206 }, { "epoch": 0.24141334746924525, "grad_norm": 6.498333129874512, "learning_rate": 8.87317091368916e-06, "loss": 17.3581, "step": 13207 }, { "epoch": 0.2414316266656918, "grad_norm": 6.580875241650784, "learning_rate": 8.87298370636787e-06, "loss": 17.6637, "step": 13208 }, { "epoch": 0.2414499058621383, "grad_norm": 6.1743400167801035, "learning_rate": 8.872796485472109e-06, "loss": 17.4734, "step": 13209 }, { "epoch": 0.24146818505858483, "grad_norm": 6.9045866598215335, "learning_rate": 8.87260925100253e-06, "loss": 17.6767, "step": 13210 }, { "epoch": 0.24148646425503134, "grad_norm": 7.419330066661811, "learning_rate": 8.872422002959792e-06, "loss": 17.8789, "step": 13211 }, { "epoch": 0.24150474345147788, "grad_norm": 6.687812864967737, "learning_rate": 8.872234741344553e-06, "loss": 17.4704, "step": 13212 }, { "epoch": 0.24152302264792438, "grad_norm": 7.046758839553343, "learning_rate": 8.872047466157467e-06, "loss": 17.7868, "step": 13213 }, { "epoch": 0.24154130184437092, "grad_norm": 8.749597595690418, "learning_rate": 8.87186017739919e-06, "loss": 18.1431, "step": 13214 }, { "epoch": 0.24155958104081746, "grad_norm": 7.802112272264556, "learning_rate": 8.871672875070378e-06, "loss": 17.8983, "step": 13215 }, { "epoch": 0.24157786023726396, "grad_norm": 6.5238253261698205, "learning_rate": 8.87148555917169e-06, "loss": 17.4987, "step": 13216 }, { "epoch": 0.2415961394337105, "grad_norm": 8.130165120182328, "learning_rate": 8.87129822970378e-06, "loss": 18.2404, "step": 13217 }, { "epoch": 0.241614418630157, "grad_norm": 5.819613447220564, "learning_rate": 8.871110886667307e-06, "loss": 17.2527, "step": 13218 }, { "epoch": 0.24163269782660354, "grad_norm": 6.428250895224231, "learning_rate": 8.870923530062925e-06, "loss": 17.473, "step": 13219 }, { "epoch": 0.24165097702305008, "grad_norm": 9.147063504535623, "learning_rate": 8.870736159891294e-06, "loss": 18.2164, "step": 13220 }, { "epoch": 0.2416692562194966, "grad_norm": 6.396228529713454, "learning_rate": 8.870548776153066e-06, "loss": 17.3975, "step": 13221 }, { "epoch": 0.24168753541594312, "grad_norm": 5.741575381771392, "learning_rate": 8.870361378848902e-06, "loss": 17.2317, "step": 13222 }, { "epoch": 0.24170581461238963, "grad_norm": 6.002851179289545, "learning_rate": 8.870173967979457e-06, "loss": 17.4199, "step": 13223 }, { "epoch": 0.24172409380883617, "grad_norm": 6.375433046931361, "learning_rate": 8.869986543545386e-06, "loss": 17.4255, "step": 13224 }, { "epoch": 0.2417423730052827, "grad_norm": 6.964841275569278, "learning_rate": 8.869799105547349e-06, "loss": 17.7085, "step": 13225 }, { "epoch": 0.2417606522017292, "grad_norm": 6.189593330134313, "learning_rate": 8.869611653986004e-06, "loss": 17.3318, "step": 13226 }, { "epoch": 0.24177893139817574, "grad_norm": 6.558356271458293, "learning_rate": 8.869424188862005e-06, "loss": 17.6748, "step": 13227 }, { "epoch": 0.24179721059462225, "grad_norm": 7.597312531011783, "learning_rate": 8.869236710176011e-06, "loss": 18.17, "step": 13228 }, { "epoch": 0.2418154897910688, "grad_norm": 8.952541142696836, "learning_rate": 8.869049217928675e-06, "loss": 18.4476, "step": 13229 }, { "epoch": 0.2418337689875153, "grad_norm": 5.988686992748572, "learning_rate": 8.86886171212066e-06, "loss": 17.1655, "step": 13230 }, { "epoch": 0.24185204818396183, "grad_norm": 6.461742159027759, "learning_rate": 8.868674192752617e-06, "loss": 17.3513, "step": 13231 }, { "epoch": 0.24187032738040837, "grad_norm": 6.431905079479456, "learning_rate": 8.86848665982521e-06, "loss": 17.3626, "step": 13232 }, { "epoch": 0.24188860657685488, "grad_norm": 7.974793074328844, "learning_rate": 8.868299113339093e-06, "loss": 17.9988, "step": 13233 }, { "epoch": 0.2419068857733014, "grad_norm": 7.704360688764585, "learning_rate": 8.868111553294922e-06, "loss": 18.0645, "step": 13234 }, { "epoch": 0.24192516496974792, "grad_norm": 5.54608407310233, "learning_rate": 8.867923979693355e-06, "loss": 17.1499, "step": 13235 }, { "epoch": 0.24194344416619445, "grad_norm": 7.503894412720114, "learning_rate": 8.867736392535051e-06, "loss": 17.714, "step": 13236 }, { "epoch": 0.241961723362641, "grad_norm": 6.077081447168073, "learning_rate": 8.867548791820669e-06, "loss": 17.3986, "step": 13237 }, { "epoch": 0.2419800025590875, "grad_norm": 7.31575454844323, "learning_rate": 8.86736117755086e-06, "loss": 18.2215, "step": 13238 }, { "epoch": 0.24199828175553403, "grad_norm": 7.196479914345386, "learning_rate": 8.867173549726288e-06, "loss": 17.7561, "step": 13239 }, { "epoch": 0.24201656095198054, "grad_norm": 6.805207069244284, "learning_rate": 8.866985908347608e-06, "loss": 17.7114, "step": 13240 }, { "epoch": 0.24203484014842708, "grad_norm": 5.460241965285942, "learning_rate": 8.866798253415477e-06, "loss": 17.1375, "step": 13241 }, { "epoch": 0.2420531193448736, "grad_norm": 6.996102130263047, "learning_rate": 8.866610584930555e-06, "loss": 17.9595, "step": 13242 }, { "epoch": 0.24207139854132012, "grad_norm": 6.57622767178465, "learning_rate": 8.866422902893497e-06, "loss": 17.8044, "step": 13243 }, { "epoch": 0.24208967773776666, "grad_norm": 7.344064393129531, "learning_rate": 8.866235207304963e-06, "loss": 17.5162, "step": 13244 }, { "epoch": 0.24210795693421316, "grad_norm": 6.509075212063092, "learning_rate": 8.86604749816561e-06, "loss": 17.4359, "step": 13245 }, { "epoch": 0.2421262361306597, "grad_norm": 6.909143099713661, "learning_rate": 8.865859775476096e-06, "loss": 17.8215, "step": 13246 }, { "epoch": 0.2421445153271062, "grad_norm": 6.543960356519501, "learning_rate": 8.865672039237079e-06, "loss": 17.7186, "step": 13247 }, { "epoch": 0.24216279452355274, "grad_norm": 6.254846658838982, "learning_rate": 8.865484289449218e-06, "loss": 17.3325, "step": 13248 }, { "epoch": 0.24218107371999928, "grad_norm": 7.641910661522797, "learning_rate": 8.865296526113167e-06, "loss": 17.8863, "step": 13249 }, { "epoch": 0.2421993529164458, "grad_norm": 5.761708675709654, "learning_rate": 8.86510874922959e-06, "loss": 17.3231, "step": 13250 }, { "epoch": 0.24221763211289232, "grad_norm": 7.396955615086446, "learning_rate": 8.864920958799141e-06, "loss": 18.1548, "step": 13251 }, { "epoch": 0.24223591130933883, "grad_norm": 6.385589888431112, "learning_rate": 8.86473315482248e-06, "loss": 17.4871, "step": 13252 }, { "epoch": 0.24225419050578537, "grad_norm": 7.515137772539918, "learning_rate": 8.864545337300264e-06, "loss": 18.0275, "step": 13253 }, { "epoch": 0.2422724697022319, "grad_norm": 7.713963158824052, "learning_rate": 8.864357506233153e-06, "loss": 17.8809, "step": 13254 }, { "epoch": 0.2422907488986784, "grad_norm": 7.668300760106162, "learning_rate": 8.864169661621803e-06, "loss": 17.8319, "step": 13255 }, { "epoch": 0.24230902809512495, "grad_norm": 7.517969916661936, "learning_rate": 8.863981803466875e-06, "loss": 17.7307, "step": 13256 }, { "epoch": 0.24232730729157145, "grad_norm": 6.106088600959224, "learning_rate": 8.863793931769024e-06, "loss": 17.4672, "step": 13257 }, { "epoch": 0.242345586488018, "grad_norm": 8.690410270027261, "learning_rate": 8.863606046528911e-06, "loss": 17.8399, "step": 13258 }, { "epoch": 0.24236386568446452, "grad_norm": 5.9146146761704514, "learning_rate": 8.863418147747196e-06, "loss": 17.5012, "step": 13259 }, { "epoch": 0.24238214488091103, "grad_norm": 6.576900680122438, "learning_rate": 8.863230235424536e-06, "loss": 17.61, "step": 13260 }, { "epoch": 0.24240042407735757, "grad_norm": 6.039999456418752, "learning_rate": 8.863042309561587e-06, "loss": 17.3064, "step": 13261 }, { "epoch": 0.24241870327380408, "grad_norm": 6.466453689308442, "learning_rate": 8.86285437015901e-06, "loss": 17.4131, "step": 13262 }, { "epoch": 0.2424369824702506, "grad_norm": 8.318876179177755, "learning_rate": 8.862666417217465e-06, "loss": 18.016, "step": 13263 }, { "epoch": 0.24245526166669712, "grad_norm": 6.039915497893999, "learning_rate": 8.862478450737609e-06, "loss": 17.3117, "step": 13264 }, { "epoch": 0.24247354086314366, "grad_norm": 7.369563502714089, "learning_rate": 8.862290470720101e-06, "loss": 17.8927, "step": 13265 }, { "epoch": 0.2424918200595902, "grad_norm": 6.822151712253876, "learning_rate": 8.862102477165599e-06, "loss": 17.4322, "step": 13266 }, { "epoch": 0.2425100992560367, "grad_norm": 6.346122680891925, "learning_rate": 8.861914470074765e-06, "loss": 17.6086, "step": 13267 }, { "epoch": 0.24252837845248323, "grad_norm": 7.771126110246708, "learning_rate": 8.861726449448255e-06, "loss": 18.0334, "step": 13268 }, { "epoch": 0.24254665764892974, "grad_norm": 7.316897383884866, "learning_rate": 8.861538415286727e-06, "loss": 17.8483, "step": 13269 }, { "epoch": 0.24256493684537628, "grad_norm": 7.346397828902137, "learning_rate": 8.861350367590845e-06, "loss": 17.6931, "step": 13270 }, { "epoch": 0.2425832160418228, "grad_norm": 7.626975587403114, "learning_rate": 8.861162306361263e-06, "loss": 17.9159, "step": 13271 }, { "epoch": 0.24260149523826932, "grad_norm": 6.737947123073347, "learning_rate": 8.860974231598645e-06, "loss": 17.3233, "step": 13272 }, { "epoch": 0.24261977443471586, "grad_norm": 7.72127351728017, "learning_rate": 8.860786143303645e-06, "loss": 17.8567, "step": 13273 }, { "epoch": 0.24263805363116236, "grad_norm": 5.858346090633954, "learning_rate": 8.860598041476924e-06, "loss": 17.2809, "step": 13274 }, { "epoch": 0.2426563328276089, "grad_norm": 6.938778930396184, "learning_rate": 8.860409926119142e-06, "loss": 17.655, "step": 13275 }, { "epoch": 0.24267461202405544, "grad_norm": 7.698228884170294, "learning_rate": 8.86022179723096e-06, "loss": 17.669, "step": 13276 }, { "epoch": 0.24269289122050194, "grad_norm": 5.775058615370471, "learning_rate": 8.860033654813033e-06, "loss": 17.0366, "step": 13277 }, { "epoch": 0.24271117041694848, "grad_norm": 6.537461079637749, "learning_rate": 8.859845498866027e-06, "loss": 17.527, "step": 13278 }, { "epoch": 0.242729449613395, "grad_norm": 5.8875767795931475, "learning_rate": 8.859657329390595e-06, "loss": 17.2091, "step": 13279 }, { "epoch": 0.24274772880984152, "grad_norm": 8.352655247672722, "learning_rate": 8.859469146387399e-06, "loss": 18.5289, "step": 13280 }, { "epoch": 0.24276600800628803, "grad_norm": 7.035080451229858, "learning_rate": 8.859280949857098e-06, "loss": 17.9262, "step": 13281 }, { "epoch": 0.24278428720273457, "grad_norm": 7.949003052893934, "learning_rate": 8.859092739800353e-06, "loss": 17.8076, "step": 13282 }, { "epoch": 0.2428025663991811, "grad_norm": 6.352949564202136, "learning_rate": 8.858904516217821e-06, "loss": 17.4697, "step": 13283 }, { "epoch": 0.2428208455956276, "grad_norm": 6.412127243721164, "learning_rate": 8.858716279110166e-06, "loss": 17.2088, "step": 13284 }, { "epoch": 0.24283912479207415, "grad_norm": 7.182949874929911, "learning_rate": 8.858528028478044e-06, "loss": 17.8308, "step": 13285 }, { "epoch": 0.24285740398852065, "grad_norm": 6.6943952253064865, "learning_rate": 8.858339764322118e-06, "loss": 17.285, "step": 13286 }, { "epoch": 0.2428756831849672, "grad_norm": 7.803722863692137, "learning_rate": 8.858151486643043e-06, "loss": 18.2402, "step": 13287 }, { "epoch": 0.24289396238141372, "grad_norm": 5.810079677159859, "learning_rate": 8.857963195441483e-06, "loss": 17.1884, "step": 13288 }, { "epoch": 0.24291224157786023, "grad_norm": 8.499056624784934, "learning_rate": 8.857774890718098e-06, "loss": 18.0358, "step": 13289 }, { "epoch": 0.24293052077430677, "grad_norm": 5.607226767592612, "learning_rate": 8.857586572473544e-06, "loss": 16.9867, "step": 13290 }, { "epoch": 0.24294879997075328, "grad_norm": 7.082798162482926, "learning_rate": 8.857398240708487e-06, "loss": 17.9252, "step": 13291 }, { "epoch": 0.2429670791671998, "grad_norm": 6.928844891573724, "learning_rate": 8.857209895423582e-06, "loss": 17.8962, "step": 13292 }, { "epoch": 0.24298535836364635, "grad_norm": 6.79816833825027, "learning_rate": 8.857021536619493e-06, "loss": 17.7566, "step": 13293 }, { "epoch": 0.24300363756009286, "grad_norm": 8.708056121991014, "learning_rate": 8.856833164296877e-06, "loss": 17.7842, "step": 13294 }, { "epoch": 0.2430219167565394, "grad_norm": 6.249069389945501, "learning_rate": 8.856644778456394e-06, "loss": 17.4322, "step": 13295 }, { "epoch": 0.2430401959529859, "grad_norm": 5.2644819088888015, "learning_rate": 8.856456379098707e-06, "loss": 16.7531, "step": 13296 }, { "epoch": 0.24305847514943243, "grad_norm": 6.690942598412263, "learning_rate": 8.856267966224474e-06, "loss": 17.544, "step": 13297 }, { "epoch": 0.24307675434587894, "grad_norm": 8.679717368008, "learning_rate": 8.856079539834357e-06, "loss": 18.7665, "step": 13298 }, { "epoch": 0.24309503354232548, "grad_norm": 6.965027228531672, "learning_rate": 8.855891099929017e-06, "loss": 17.8125, "step": 13299 }, { "epoch": 0.243113312738772, "grad_norm": 6.9312897841704615, "learning_rate": 8.855702646509113e-06, "loss": 17.747, "step": 13300 }, { "epoch": 0.24313159193521852, "grad_norm": 7.540184567771135, "learning_rate": 8.855514179575305e-06, "loss": 18.2535, "step": 13301 }, { "epoch": 0.24314987113166506, "grad_norm": 6.7747784609436135, "learning_rate": 8.855325699128255e-06, "loss": 17.7905, "step": 13302 }, { "epoch": 0.24316815032811157, "grad_norm": 6.1467852029172985, "learning_rate": 8.855137205168623e-06, "loss": 17.6317, "step": 13303 }, { "epoch": 0.2431864295245581, "grad_norm": 6.450889722139722, "learning_rate": 8.854948697697068e-06, "loss": 17.5033, "step": 13304 }, { "epoch": 0.24320470872100464, "grad_norm": 6.690876513263371, "learning_rate": 8.854760176714254e-06, "loss": 17.7083, "step": 13305 }, { "epoch": 0.24322298791745114, "grad_norm": 7.881204537273559, "learning_rate": 8.854571642220839e-06, "loss": 17.9876, "step": 13306 }, { "epoch": 0.24324126711389768, "grad_norm": 6.481490924340865, "learning_rate": 8.854383094217485e-06, "loss": 17.5346, "step": 13307 }, { "epoch": 0.2432595463103442, "grad_norm": 6.515330290429976, "learning_rate": 8.854194532704854e-06, "loss": 17.0989, "step": 13308 }, { "epoch": 0.24327782550679072, "grad_norm": 6.822772907358152, "learning_rate": 8.854005957683604e-06, "loss": 17.7118, "step": 13309 }, { "epoch": 0.24329610470323726, "grad_norm": 7.0022039778280885, "learning_rate": 8.8538173691544e-06, "loss": 17.6289, "step": 13310 }, { "epoch": 0.24331438389968377, "grad_norm": 6.884454555826788, "learning_rate": 8.853628767117899e-06, "loss": 17.6373, "step": 13311 }, { "epoch": 0.2433326630961303, "grad_norm": 6.250198007933299, "learning_rate": 8.853440151574762e-06, "loss": 17.3452, "step": 13312 }, { "epoch": 0.2433509422925768, "grad_norm": 7.231128093485779, "learning_rate": 8.853251522525655e-06, "loss": 17.971, "step": 13313 }, { "epoch": 0.24336922148902335, "grad_norm": 7.839890967614947, "learning_rate": 8.853062879971232e-06, "loss": 18.1151, "step": 13314 }, { "epoch": 0.24338750068546985, "grad_norm": 6.908170487681074, "learning_rate": 8.85287422391216e-06, "loss": 17.5089, "step": 13315 }, { "epoch": 0.2434057798819164, "grad_norm": 7.084431505572852, "learning_rate": 8.8526855543491e-06, "loss": 17.7467, "step": 13316 }, { "epoch": 0.24342405907836293, "grad_norm": 6.4107654527837195, "learning_rate": 8.852496871282707e-06, "loss": 17.8434, "step": 13317 }, { "epoch": 0.24344233827480943, "grad_norm": 6.668920080219149, "learning_rate": 8.85230817471365e-06, "loss": 17.7183, "step": 13318 }, { "epoch": 0.24346061747125597, "grad_norm": 6.298989301834971, "learning_rate": 8.852119464642586e-06, "loss": 17.5342, "step": 13319 }, { "epoch": 0.24347889666770248, "grad_norm": 5.739422634792878, "learning_rate": 8.851930741070179e-06, "loss": 17.2153, "step": 13320 }, { "epoch": 0.243497175864149, "grad_norm": 5.214420217905649, "learning_rate": 8.851742003997088e-06, "loss": 17.0749, "step": 13321 }, { "epoch": 0.24351545506059555, "grad_norm": 8.661960000088818, "learning_rate": 8.851553253423974e-06, "loss": 17.9828, "step": 13322 }, { "epoch": 0.24353373425704206, "grad_norm": 6.674174401769316, "learning_rate": 8.851364489351504e-06, "loss": 17.4714, "step": 13323 }, { "epoch": 0.2435520134534886, "grad_norm": 6.187393832054604, "learning_rate": 8.851175711780331e-06, "loss": 17.2636, "step": 13324 }, { "epoch": 0.2435702926499351, "grad_norm": 6.536215441237082, "learning_rate": 8.850986920711124e-06, "loss": 17.3158, "step": 13325 }, { "epoch": 0.24358857184638164, "grad_norm": 6.333101863390559, "learning_rate": 8.850798116144542e-06, "loss": 17.4345, "step": 13326 }, { "epoch": 0.24360685104282817, "grad_norm": 7.629197284006749, "learning_rate": 8.850609298081247e-06, "loss": 18.2452, "step": 13327 }, { "epoch": 0.24362513023927468, "grad_norm": 7.707916341218809, "learning_rate": 8.8504204665219e-06, "loss": 18.0921, "step": 13328 }, { "epoch": 0.24364340943572121, "grad_norm": 6.931500141022469, "learning_rate": 8.850231621467162e-06, "loss": 17.552, "step": 13329 }, { "epoch": 0.24366168863216772, "grad_norm": 6.7577127924474265, "learning_rate": 8.850042762917698e-06, "loss": 17.6586, "step": 13330 }, { "epoch": 0.24367996782861426, "grad_norm": 6.71347104740245, "learning_rate": 8.849853890874168e-06, "loss": 17.7506, "step": 13331 }, { "epoch": 0.24369824702506077, "grad_norm": 6.60361960143841, "learning_rate": 8.849665005337234e-06, "loss": 17.6244, "step": 13332 }, { "epoch": 0.2437165262215073, "grad_norm": 6.404002992694197, "learning_rate": 8.849476106307558e-06, "loss": 17.5198, "step": 13333 }, { "epoch": 0.24373480541795384, "grad_norm": 6.740798468982144, "learning_rate": 8.849287193785803e-06, "loss": 17.7947, "step": 13334 }, { "epoch": 0.24375308461440034, "grad_norm": 7.478272733324508, "learning_rate": 8.84909826777263e-06, "loss": 17.9603, "step": 13335 }, { "epoch": 0.24377136381084688, "grad_norm": 5.8534147617358165, "learning_rate": 8.848909328268702e-06, "loss": 17.2796, "step": 13336 }, { "epoch": 0.2437896430072934, "grad_norm": 6.2500079619499305, "learning_rate": 8.84872037527468e-06, "loss": 17.4809, "step": 13337 }, { "epoch": 0.24380792220373992, "grad_norm": 7.060013727147254, "learning_rate": 8.848531408791226e-06, "loss": 17.8596, "step": 13338 }, { "epoch": 0.24382620140018646, "grad_norm": 6.129256935432026, "learning_rate": 8.848342428819006e-06, "loss": 17.5985, "step": 13339 }, { "epoch": 0.24384448059663297, "grad_norm": 7.852104461299691, "learning_rate": 8.848153435358678e-06, "loss": 17.7454, "step": 13340 }, { "epoch": 0.2438627597930795, "grad_norm": 7.179842200790302, "learning_rate": 8.847964428410907e-06, "loss": 17.709, "step": 13341 }, { "epoch": 0.243881038989526, "grad_norm": 7.099405078928812, "learning_rate": 8.847775407976353e-06, "loss": 17.9011, "step": 13342 }, { "epoch": 0.24389931818597255, "grad_norm": 6.579104355020609, "learning_rate": 8.84758637405568e-06, "loss": 17.4511, "step": 13343 }, { "epoch": 0.24391759738241908, "grad_norm": 6.361251746274071, "learning_rate": 8.847397326649553e-06, "loss": 17.3535, "step": 13344 }, { "epoch": 0.2439358765788656, "grad_norm": 6.134572598632472, "learning_rate": 8.847208265758633e-06, "loss": 17.4643, "step": 13345 }, { "epoch": 0.24395415577531213, "grad_norm": 7.10404281793083, "learning_rate": 8.84701919138358e-06, "loss": 17.8232, "step": 13346 }, { "epoch": 0.24397243497175863, "grad_norm": 6.672163379254932, "learning_rate": 8.846830103525056e-06, "loss": 17.4976, "step": 13347 }, { "epoch": 0.24399071416820517, "grad_norm": 7.540641617800829, "learning_rate": 8.84664100218373e-06, "loss": 17.9285, "step": 13348 }, { "epoch": 0.24400899336465168, "grad_norm": 8.027690292367463, "learning_rate": 8.84645188736026e-06, "loss": 18.2729, "step": 13349 }, { "epoch": 0.2440272725610982, "grad_norm": 7.01108329933926, "learning_rate": 8.846262759055311e-06, "loss": 17.7341, "step": 13350 }, { "epoch": 0.24404555175754475, "grad_norm": 7.618609732374895, "learning_rate": 8.846073617269542e-06, "loss": 17.7382, "step": 13351 }, { "epoch": 0.24406383095399126, "grad_norm": 7.669566130584318, "learning_rate": 8.84588446200362e-06, "loss": 17.8425, "step": 13352 }, { "epoch": 0.2440821101504378, "grad_norm": 7.6072715587573345, "learning_rate": 8.845695293258207e-06, "loss": 18.1951, "step": 13353 }, { "epoch": 0.2441003893468843, "grad_norm": 6.899448402243731, "learning_rate": 8.845506111033966e-06, "loss": 17.5865, "step": 13354 }, { "epoch": 0.24411866854333084, "grad_norm": 6.901240507089418, "learning_rate": 8.84531691533156e-06, "loss": 17.8634, "step": 13355 }, { "epoch": 0.24413694773977737, "grad_norm": 8.183313145335115, "learning_rate": 8.84512770615165e-06, "loss": 16.903, "step": 13356 }, { "epoch": 0.24415522693622388, "grad_norm": 7.5261891727210095, "learning_rate": 8.844938483494905e-06, "loss": 17.9907, "step": 13357 }, { "epoch": 0.24417350613267041, "grad_norm": 7.149270791029402, "learning_rate": 8.844749247361982e-06, "loss": 17.6361, "step": 13358 }, { "epoch": 0.24419178532911692, "grad_norm": 10.214463600002174, "learning_rate": 8.844559997753546e-06, "loss": 18.5494, "step": 13359 }, { "epoch": 0.24421006452556346, "grad_norm": 7.225317121006948, "learning_rate": 8.84437073467026e-06, "loss": 17.7026, "step": 13360 }, { "epoch": 0.24422834372201, "grad_norm": 7.632819658495426, "learning_rate": 8.844181458112791e-06, "loss": 17.9165, "step": 13361 }, { "epoch": 0.2442466229184565, "grad_norm": 7.972366299924804, "learning_rate": 8.843992168081796e-06, "loss": 18.0311, "step": 13362 }, { "epoch": 0.24426490211490304, "grad_norm": 6.2756417482546905, "learning_rate": 8.843802864577944e-06, "loss": 17.5389, "step": 13363 }, { "epoch": 0.24428318131134955, "grad_norm": 8.193139934904673, "learning_rate": 8.843613547601896e-06, "loss": 18.3795, "step": 13364 }, { "epoch": 0.24430146050779608, "grad_norm": 6.389396940466, "learning_rate": 8.843424217154318e-06, "loss": 17.7544, "step": 13365 }, { "epoch": 0.2443197397042426, "grad_norm": 7.160598777970506, "learning_rate": 8.843234873235869e-06, "loss": 17.7244, "step": 13366 }, { "epoch": 0.24433801890068912, "grad_norm": 6.64930968727387, "learning_rate": 8.843045515847217e-06, "loss": 17.5518, "step": 13367 }, { "epoch": 0.24435629809713566, "grad_norm": 5.671549508002502, "learning_rate": 8.842856144989023e-06, "loss": 17.1291, "step": 13368 }, { "epoch": 0.24437457729358217, "grad_norm": 7.902320979850315, "learning_rate": 8.842666760661951e-06, "loss": 17.6099, "step": 13369 }, { "epoch": 0.2443928564900287, "grad_norm": 5.965673149399645, "learning_rate": 8.842477362866667e-06, "loss": 17.2664, "step": 13370 }, { "epoch": 0.2444111356864752, "grad_norm": 5.644593567042512, "learning_rate": 8.842287951603833e-06, "loss": 17.3299, "step": 13371 }, { "epoch": 0.24442941488292175, "grad_norm": 6.662397886446397, "learning_rate": 8.842098526874113e-06, "loss": 17.4437, "step": 13372 }, { "epoch": 0.24444769407936828, "grad_norm": 7.231356628187275, "learning_rate": 8.841909088678172e-06, "loss": 17.954, "step": 13373 }, { "epoch": 0.2444659732758148, "grad_norm": 6.770349758066719, "learning_rate": 8.84171963701667e-06, "loss": 17.6923, "step": 13374 }, { "epoch": 0.24448425247226133, "grad_norm": 7.465032746595875, "learning_rate": 8.841530171890275e-06, "loss": 18.0996, "step": 13375 }, { "epoch": 0.24450253166870783, "grad_norm": 6.4393473099515575, "learning_rate": 8.841340693299653e-06, "loss": 17.4858, "step": 13376 }, { "epoch": 0.24452081086515437, "grad_norm": 7.163912942037377, "learning_rate": 8.841151201245462e-06, "loss": 17.8614, "step": 13377 }, { "epoch": 0.2445390900616009, "grad_norm": 7.405374688845051, "learning_rate": 8.840961695728372e-06, "loss": 17.8952, "step": 13378 }, { "epoch": 0.2445573692580474, "grad_norm": 5.637338579282636, "learning_rate": 8.840772176749042e-06, "loss": 17.0259, "step": 13379 }, { "epoch": 0.24457564845449395, "grad_norm": 6.369503905085804, "learning_rate": 8.84058264430814e-06, "loss": 17.3215, "step": 13380 }, { "epoch": 0.24459392765094046, "grad_norm": 5.335592655839366, "learning_rate": 8.84039309840633e-06, "loss": 17.1554, "step": 13381 }, { "epoch": 0.244612206847387, "grad_norm": 6.716870205319661, "learning_rate": 8.840203539044273e-06, "loss": 17.7323, "step": 13382 }, { "epoch": 0.2446304860438335, "grad_norm": 7.826668518376646, "learning_rate": 8.840013966222638e-06, "loss": 17.7759, "step": 13383 }, { "epoch": 0.24464876524028004, "grad_norm": 6.812892731960148, "learning_rate": 8.839824379942089e-06, "loss": 17.7177, "step": 13384 }, { "epoch": 0.24466704443672657, "grad_norm": 5.63299034570083, "learning_rate": 8.839634780203285e-06, "loss": 17.0862, "step": 13385 }, { "epoch": 0.24468532363317308, "grad_norm": 7.337248803763474, "learning_rate": 8.839445167006894e-06, "loss": 18.2525, "step": 13386 }, { "epoch": 0.24470360282961962, "grad_norm": 6.918295093298403, "learning_rate": 8.839255540353583e-06, "loss": 17.6056, "step": 13387 }, { "epoch": 0.24472188202606612, "grad_norm": 7.020567883012543, "learning_rate": 8.839065900244015e-06, "loss": 18.1106, "step": 13388 }, { "epoch": 0.24474016122251266, "grad_norm": 6.150241307155845, "learning_rate": 8.838876246678854e-06, "loss": 17.4877, "step": 13389 }, { "epoch": 0.2447584404189592, "grad_norm": 7.208694299041957, "learning_rate": 8.838686579658763e-06, "loss": 18.076, "step": 13390 }, { "epoch": 0.2447767196154057, "grad_norm": 5.86685802657085, "learning_rate": 8.83849689918441e-06, "loss": 17.3161, "step": 13391 }, { "epoch": 0.24479499881185224, "grad_norm": 7.405617494174632, "learning_rate": 8.838307205256458e-06, "loss": 17.7358, "step": 13392 }, { "epoch": 0.24481327800829875, "grad_norm": 8.410389976756395, "learning_rate": 8.838117497875572e-06, "loss": 18.2559, "step": 13393 }, { "epoch": 0.24483155720474528, "grad_norm": 7.882687619487962, "learning_rate": 8.83792777704242e-06, "loss": 17.7044, "step": 13394 }, { "epoch": 0.24484983640119182, "grad_norm": 5.986235546857629, "learning_rate": 8.83773804275766e-06, "loss": 17.1556, "step": 13395 }, { "epoch": 0.24486811559763833, "grad_norm": 6.177255456277056, "learning_rate": 8.837548295021963e-06, "loss": 17.2895, "step": 13396 }, { "epoch": 0.24488639479408486, "grad_norm": 6.904485474398583, "learning_rate": 8.837358533835992e-06, "loss": 17.6568, "step": 13397 }, { "epoch": 0.24490467399053137, "grad_norm": 7.28159790149412, "learning_rate": 8.837168759200413e-06, "loss": 18.0456, "step": 13398 }, { "epoch": 0.2449229531869779, "grad_norm": 7.663327211935231, "learning_rate": 8.83697897111589e-06, "loss": 18.1913, "step": 13399 }, { "epoch": 0.2449412323834244, "grad_norm": 8.820204283182152, "learning_rate": 8.836789169583089e-06, "loss": 18.0061, "step": 13400 }, { "epoch": 0.24495951157987095, "grad_norm": 8.317022410557518, "learning_rate": 8.836599354602674e-06, "loss": 17.9331, "step": 13401 }, { "epoch": 0.24497779077631748, "grad_norm": 6.1434665356788205, "learning_rate": 8.836409526175314e-06, "loss": 17.0856, "step": 13402 }, { "epoch": 0.244996069972764, "grad_norm": 6.046074046650978, "learning_rate": 8.836219684301667e-06, "loss": 17.3953, "step": 13403 }, { "epoch": 0.24501434916921053, "grad_norm": 6.058140723586201, "learning_rate": 8.836029828982407e-06, "loss": 17.609, "step": 13404 }, { "epoch": 0.24503262836565703, "grad_norm": 7.067027057229567, "learning_rate": 8.835839960218193e-06, "loss": 17.7556, "step": 13405 }, { "epoch": 0.24505090756210357, "grad_norm": 7.224133713204975, "learning_rate": 8.835650078009694e-06, "loss": 17.7724, "step": 13406 }, { "epoch": 0.2450691867585501, "grad_norm": 5.508434191393395, "learning_rate": 8.835460182357573e-06, "loss": 17.1609, "step": 13407 }, { "epoch": 0.24508746595499661, "grad_norm": 6.611146244016963, "learning_rate": 8.835270273262498e-06, "loss": 17.6224, "step": 13408 }, { "epoch": 0.24510574515144315, "grad_norm": 6.309339124168227, "learning_rate": 8.835080350725133e-06, "loss": 17.4649, "step": 13409 }, { "epoch": 0.24512402434788966, "grad_norm": 7.618887437895719, "learning_rate": 8.834890414746144e-06, "loss": 17.9619, "step": 13410 }, { "epoch": 0.2451423035443362, "grad_norm": 5.807315268732273, "learning_rate": 8.834700465326198e-06, "loss": 17.3501, "step": 13411 }, { "epoch": 0.24516058274078273, "grad_norm": 5.200649025143558, "learning_rate": 8.834510502465959e-06, "loss": 17.21, "step": 13412 }, { "epoch": 0.24517886193722924, "grad_norm": 7.39514446397721, "learning_rate": 8.834320526166092e-06, "loss": 17.6912, "step": 13413 }, { "epoch": 0.24519714113367577, "grad_norm": 7.080208674680301, "learning_rate": 8.834130536427266e-06, "loss": 17.8667, "step": 13414 }, { "epoch": 0.24521542033012228, "grad_norm": 6.669847957111236, "learning_rate": 8.833940533250146e-06, "loss": 17.5223, "step": 13415 }, { "epoch": 0.24523369952656882, "grad_norm": 6.999556313077716, "learning_rate": 8.833750516635395e-06, "loss": 17.6839, "step": 13416 }, { "epoch": 0.24525197872301532, "grad_norm": 7.493540137187102, "learning_rate": 8.83356048658368e-06, "loss": 17.5081, "step": 13417 }, { "epoch": 0.24527025791946186, "grad_norm": 5.980800051120309, "learning_rate": 8.833370443095671e-06, "loss": 17.5579, "step": 13418 }, { "epoch": 0.2452885371159084, "grad_norm": 6.675804185575348, "learning_rate": 8.833180386172032e-06, "loss": 17.6918, "step": 13419 }, { "epoch": 0.2453068163123549, "grad_norm": 5.669189846765466, "learning_rate": 8.832990315813425e-06, "loss": 16.9523, "step": 13420 }, { "epoch": 0.24532509550880144, "grad_norm": 8.225106534681808, "learning_rate": 8.832800232020521e-06, "loss": 18.0924, "step": 13421 }, { "epoch": 0.24534337470524795, "grad_norm": 6.543938118391621, "learning_rate": 8.832610134793985e-06, "loss": 17.3952, "step": 13422 }, { "epoch": 0.24536165390169448, "grad_norm": 6.19665585703796, "learning_rate": 8.832420024134484e-06, "loss": 17.3581, "step": 13423 }, { "epoch": 0.24537993309814102, "grad_norm": 7.108267611247485, "learning_rate": 8.83222990004268e-06, "loss": 17.6781, "step": 13424 }, { "epoch": 0.24539821229458753, "grad_norm": 7.422004670472646, "learning_rate": 8.832039762519247e-06, "loss": 18.1953, "step": 13425 }, { "epoch": 0.24541649149103406, "grad_norm": 6.954008379135574, "learning_rate": 8.831849611564845e-06, "loss": 17.3529, "step": 13426 }, { "epoch": 0.24543477068748057, "grad_norm": 5.8034365000088215, "learning_rate": 8.831659447180143e-06, "loss": 17.3271, "step": 13427 }, { "epoch": 0.2454530498839271, "grad_norm": 7.666960779097515, "learning_rate": 8.831469269365808e-06, "loss": 18.2337, "step": 13428 }, { "epoch": 0.24547132908037364, "grad_norm": 7.806620772604487, "learning_rate": 8.831279078122505e-06, "loss": 18.0384, "step": 13429 }, { "epoch": 0.24548960827682015, "grad_norm": 5.910725999860661, "learning_rate": 8.831088873450902e-06, "loss": 17.5923, "step": 13430 }, { "epoch": 0.24550788747326668, "grad_norm": 6.647740532577579, "learning_rate": 8.830898655351663e-06, "loss": 17.7018, "step": 13431 }, { "epoch": 0.2455261666697132, "grad_norm": 6.381486614235224, "learning_rate": 8.830708423825458e-06, "loss": 17.5859, "step": 13432 }, { "epoch": 0.24554444586615973, "grad_norm": 6.394719458885018, "learning_rate": 8.83051817887295e-06, "loss": 17.3302, "step": 13433 }, { "epoch": 0.24556272506260624, "grad_norm": 6.066904758483784, "learning_rate": 8.830327920494812e-06, "loss": 17.4341, "step": 13434 }, { "epoch": 0.24558100425905277, "grad_norm": 7.070178994718958, "learning_rate": 8.830137648691705e-06, "loss": 17.9941, "step": 13435 }, { "epoch": 0.2455992834554993, "grad_norm": 6.085682839386033, "learning_rate": 8.8299473634643e-06, "loss": 17.2157, "step": 13436 }, { "epoch": 0.24561756265194581, "grad_norm": 6.91618577276595, "learning_rate": 8.829757064813257e-06, "loss": 17.8577, "step": 13437 }, { "epoch": 0.24563584184839235, "grad_norm": 7.0115887406466975, "learning_rate": 8.829566752739252e-06, "loss": 17.8506, "step": 13438 }, { "epoch": 0.24565412104483886, "grad_norm": 6.654487020029335, "learning_rate": 8.829376427242948e-06, "loss": 17.6821, "step": 13439 }, { "epoch": 0.2456724002412854, "grad_norm": 7.002473609128466, "learning_rate": 8.82918608832501e-06, "loss": 17.5262, "step": 13440 }, { "epoch": 0.24569067943773193, "grad_norm": 6.400880240180771, "learning_rate": 8.828995735986107e-06, "loss": 17.5627, "step": 13441 }, { "epoch": 0.24570895863417844, "grad_norm": 7.434675361462428, "learning_rate": 8.828805370226906e-06, "loss": 18.0045, "step": 13442 }, { "epoch": 0.24572723783062497, "grad_norm": 7.331143894515387, "learning_rate": 8.828614991048076e-06, "loss": 17.7971, "step": 13443 }, { "epoch": 0.24574551702707148, "grad_norm": 7.619034032513552, "learning_rate": 8.828424598450282e-06, "loss": 18.0238, "step": 13444 }, { "epoch": 0.24576379622351802, "grad_norm": 7.39259731663535, "learning_rate": 8.828234192434192e-06, "loss": 17.5899, "step": 13445 }, { "epoch": 0.24578207541996455, "grad_norm": 6.776273380382308, "learning_rate": 8.828043773000474e-06, "loss": 17.765, "step": 13446 }, { "epoch": 0.24580035461641106, "grad_norm": 6.252730432829984, "learning_rate": 8.827853340149794e-06, "loss": 17.5249, "step": 13447 }, { "epoch": 0.2458186338128576, "grad_norm": 7.435038084033365, "learning_rate": 8.82766289388282e-06, "loss": 17.8393, "step": 13448 }, { "epoch": 0.2458369130093041, "grad_norm": 7.802496312818711, "learning_rate": 8.827472434200219e-06, "loss": 17.8409, "step": 13449 }, { "epoch": 0.24585519220575064, "grad_norm": 7.202308153481494, "learning_rate": 8.82728196110266e-06, "loss": 18.0122, "step": 13450 }, { "epoch": 0.24587347140219715, "grad_norm": 6.924654009065137, "learning_rate": 8.827091474590811e-06, "loss": 17.8219, "step": 13451 }, { "epoch": 0.24589175059864368, "grad_norm": 7.404016704344671, "learning_rate": 8.826900974665337e-06, "loss": 17.8862, "step": 13452 }, { "epoch": 0.24591002979509022, "grad_norm": 6.6462088088911875, "learning_rate": 8.826710461326908e-06, "loss": 17.6568, "step": 13453 }, { "epoch": 0.24592830899153673, "grad_norm": 7.39997599394969, "learning_rate": 8.826519934576192e-06, "loss": 17.5758, "step": 13454 }, { "epoch": 0.24594658818798326, "grad_norm": 6.15160971977275, "learning_rate": 8.826329394413855e-06, "loss": 17.3852, "step": 13455 }, { "epoch": 0.24596486738442977, "grad_norm": 7.913428805973735, "learning_rate": 8.826138840840563e-06, "loss": 17.6378, "step": 13456 }, { "epoch": 0.2459831465808763, "grad_norm": 8.847662828382003, "learning_rate": 8.825948273856988e-06, "loss": 18.1707, "step": 13457 }, { "epoch": 0.24600142577732284, "grad_norm": 5.690044873405773, "learning_rate": 8.825757693463797e-06, "loss": 17.1624, "step": 13458 }, { "epoch": 0.24601970497376935, "grad_norm": 7.173770483159854, "learning_rate": 8.825567099661656e-06, "loss": 17.9517, "step": 13459 }, { "epoch": 0.24603798417021588, "grad_norm": 5.671500016403455, "learning_rate": 8.825376492451237e-06, "loss": 17.2507, "step": 13460 }, { "epoch": 0.2460562633666624, "grad_norm": 5.760964048052998, "learning_rate": 8.825185871833203e-06, "loss": 17.3155, "step": 13461 }, { "epoch": 0.24607454256310893, "grad_norm": 6.8402574657905255, "learning_rate": 8.824995237808224e-06, "loss": 17.7137, "step": 13462 }, { "epoch": 0.24609282175955546, "grad_norm": 6.924451096801156, "learning_rate": 8.82480459037697e-06, "loss": 17.8487, "step": 13463 }, { "epoch": 0.24611110095600197, "grad_norm": 6.399619906996067, "learning_rate": 8.824613929540107e-06, "loss": 17.5705, "step": 13464 }, { "epoch": 0.2461293801524485, "grad_norm": 5.980177457081154, "learning_rate": 8.824423255298305e-06, "loss": 17.3176, "step": 13465 }, { "epoch": 0.24614765934889501, "grad_norm": 6.200361250393343, "learning_rate": 8.824232567652232e-06, "loss": 17.407, "step": 13466 }, { "epoch": 0.24616593854534155, "grad_norm": 7.278235109275367, "learning_rate": 8.824041866602554e-06, "loss": 17.62, "step": 13467 }, { "epoch": 0.24618421774178806, "grad_norm": 6.757109365456597, "learning_rate": 8.823851152149941e-06, "loss": 18.0304, "step": 13468 }, { "epoch": 0.2462024969382346, "grad_norm": 6.254819297715207, "learning_rate": 8.823660424295062e-06, "loss": 17.5373, "step": 13469 }, { "epoch": 0.24622077613468113, "grad_norm": 6.996033752640962, "learning_rate": 8.823469683038583e-06, "loss": 17.8082, "step": 13470 }, { "epoch": 0.24623905533112764, "grad_norm": 6.9903218827039915, "learning_rate": 8.823278928381179e-06, "loss": 17.9962, "step": 13471 }, { "epoch": 0.24625733452757417, "grad_norm": 6.974909060118697, "learning_rate": 8.82308816032351e-06, "loss": 17.6751, "step": 13472 }, { "epoch": 0.24627561372402068, "grad_norm": 7.311919385715046, "learning_rate": 8.822897378866252e-06, "loss": 17.8247, "step": 13473 }, { "epoch": 0.24629389292046722, "grad_norm": 8.234798677547689, "learning_rate": 8.822706584010068e-06, "loss": 18.0569, "step": 13474 }, { "epoch": 0.24631217211691375, "grad_norm": 7.492414245740768, "learning_rate": 8.82251577575563e-06, "loss": 17.7471, "step": 13475 }, { "epoch": 0.24633045131336026, "grad_norm": 6.416032807014928, "learning_rate": 8.822324954103606e-06, "loss": 17.2773, "step": 13476 }, { "epoch": 0.2463487305098068, "grad_norm": 6.605216545214185, "learning_rate": 8.822134119054665e-06, "loss": 17.3079, "step": 13477 }, { "epoch": 0.2463670097062533, "grad_norm": 7.473347962662394, "learning_rate": 8.821943270609475e-06, "loss": 18.0302, "step": 13478 }, { "epoch": 0.24638528890269984, "grad_norm": 6.399162085666547, "learning_rate": 8.821752408768706e-06, "loss": 17.5172, "step": 13479 }, { "epoch": 0.24640356809914638, "grad_norm": 7.468345587531287, "learning_rate": 8.821561533533026e-06, "loss": 17.6476, "step": 13480 }, { "epoch": 0.24642184729559288, "grad_norm": 7.025834326555146, "learning_rate": 8.821370644903105e-06, "loss": 17.8186, "step": 13481 }, { "epoch": 0.24644012649203942, "grad_norm": 6.223068864164905, "learning_rate": 8.821179742879611e-06, "loss": 17.3983, "step": 13482 }, { "epoch": 0.24645840568848593, "grad_norm": 7.284991771763925, "learning_rate": 8.820988827463213e-06, "loss": 17.8984, "step": 13483 }, { "epoch": 0.24647668488493246, "grad_norm": 5.936303452699871, "learning_rate": 8.820797898654581e-06, "loss": 17.1977, "step": 13484 }, { "epoch": 0.24649496408137897, "grad_norm": 5.221180502955405, "learning_rate": 8.820606956454386e-06, "loss": 16.9444, "step": 13485 }, { "epoch": 0.2465132432778255, "grad_norm": 6.811217939005427, "learning_rate": 8.820416000863292e-06, "loss": 17.898, "step": 13486 }, { "epoch": 0.24653152247427204, "grad_norm": 8.177478662926852, "learning_rate": 8.820225031881974e-06, "loss": 18.3779, "step": 13487 }, { "epoch": 0.24654980167071855, "grad_norm": 7.4405321701852145, "learning_rate": 8.820034049511097e-06, "loss": 18.1126, "step": 13488 }, { "epoch": 0.24656808086716508, "grad_norm": 6.192990290104176, "learning_rate": 8.819843053751334e-06, "loss": 17.7177, "step": 13489 }, { "epoch": 0.2465863600636116, "grad_norm": 6.153071832176295, "learning_rate": 8.81965204460335e-06, "loss": 17.5496, "step": 13490 }, { "epoch": 0.24660463926005813, "grad_norm": 6.078233706862764, "learning_rate": 8.81946102206782e-06, "loss": 17.4564, "step": 13491 }, { "epoch": 0.24662291845650466, "grad_norm": 8.38149022730245, "learning_rate": 8.819269986145407e-06, "loss": 18.0966, "step": 13492 }, { "epoch": 0.24664119765295117, "grad_norm": 6.9040545767910455, "learning_rate": 8.819078936836786e-06, "loss": 17.6468, "step": 13493 }, { "epoch": 0.2466594768493977, "grad_norm": 6.477670606043992, "learning_rate": 8.818887874142625e-06, "loss": 17.3782, "step": 13494 }, { "epoch": 0.24667775604584422, "grad_norm": 6.599624140942617, "learning_rate": 8.818696798063594e-06, "loss": 17.4778, "step": 13495 }, { "epoch": 0.24669603524229075, "grad_norm": 6.918902171740237, "learning_rate": 8.818505708600363e-06, "loss": 18.0571, "step": 13496 }, { "epoch": 0.2467143144387373, "grad_norm": 6.167114363379106, "learning_rate": 8.818314605753598e-06, "loss": 17.5789, "step": 13497 }, { "epoch": 0.2467325936351838, "grad_norm": 5.4991456389946025, "learning_rate": 8.818123489523973e-06, "loss": 17.0248, "step": 13498 }, { "epoch": 0.24675087283163033, "grad_norm": 10.464567584802637, "learning_rate": 8.817932359912156e-06, "loss": 17.6705, "step": 13499 }, { "epoch": 0.24676915202807684, "grad_norm": 6.058815033485125, "learning_rate": 8.817741216918818e-06, "loss": 17.3583, "step": 13500 }, { "epoch": 0.24678743122452337, "grad_norm": 7.607334550198356, "learning_rate": 8.81755006054463e-06, "loss": 17.8198, "step": 13501 }, { "epoch": 0.24680571042096988, "grad_norm": 6.781707046645076, "learning_rate": 8.817358890790256e-06, "loss": 17.5437, "step": 13502 }, { "epoch": 0.24682398961741642, "grad_norm": 6.236260480634458, "learning_rate": 8.817167707656373e-06, "loss": 17.4142, "step": 13503 }, { "epoch": 0.24684226881386295, "grad_norm": 7.156356990978949, "learning_rate": 8.816976511143648e-06, "loss": 17.8336, "step": 13504 }, { "epoch": 0.24686054801030946, "grad_norm": 7.385429118551683, "learning_rate": 8.816785301252752e-06, "loss": 17.8506, "step": 13505 }, { "epoch": 0.246878827206756, "grad_norm": 7.008744010798375, "learning_rate": 8.816594077984355e-06, "loss": 17.7951, "step": 13506 }, { "epoch": 0.2468971064032025, "grad_norm": 6.027277764001174, "learning_rate": 8.816402841339125e-06, "loss": 17.3543, "step": 13507 }, { "epoch": 0.24691538559964904, "grad_norm": 7.306801806072262, "learning_rate": 8.816211591317736e-06, "loss": 18.0441, "step": 13508 }, { "epoch": 0.24693366479609558, "grad_norm": 8.105265740723043, "learning_rate": 8.816020327920855e-06, "loss": 18.2235, "step": 13509 }, { "epoch": 0.24695194399254208, "grad_norm": 6.669435866966423, "learning_rate": 8.815829051149156e-06, "loss": 17.3582, "step": 13510 }, { "epoch": 0.24697022318898862, "grad_norm": 8.120380399622045, "learning_rate": 8.815637761003306e-06, "loss": 17.9397, "step": 13511 }, { "epoch": 0.24698850238543513, "grad_norm": 7.940693990461981, "learning_rate": 8.815446457483977e-06, "loss": 17.9778, "step": 13512 }, { "epoch": 0.24700678158188166, "grad_norm": 6.139624392103352, "learning_rate": 8.81525514059184e-06, "loss": 17.4869, "step": 13513 }, { "epoch": 0.2470250607783282, "grad_norm": 6.140321004105239, "learning_rate": 8.815063810327564e-06, "loss": 17.4739, "step": 13514 }, { "epoch": 0.2470433399747747, "grad_norm": 7.220012039096887, "learning_rate": 8.81487246669182e-06, "loss": 17.9978, "step": 13515 }, { "epoch": 0.24706161917122124, "grad_norm": 6.052610323392659, "learning_rate": 8.81468110968528e-06, "loss": 17.4726, "step": 13516 }, { "epoch": 0.24707989836766775, "grad_norm": 7.2392828573847945, "learning_rate": 8.814489739308613e-06, "loss": 17.749, "step": 13517 }, { "epoch": 0.24709817756411429, "grad_norm": 9.178255731900403, "learning_rate": 8.81429835556249e-06, "loss": 18.6995, "step": 13518 }, { "epoch": 0.2471164567605608, "grad_norm": 6.151055631501488, "learning_rate": 8.814106958447584e-06, "loss": 17.5311, "step": 13519 }, { "epoch": 0.24713473595700733, "grad_norm": 6.351774958778554, "learning_rate": 8.813915547964561e-06, "loss": 17.5953, "step": 13520 }, { "epoch": 0.24715301515345386, "grad_norm": 6.20297852456471, "learning_rate": 8.813724124114099e-06, "loss": 17.3503, "step": 13521 }, { "epoch": 0.24717129434990037, "grad_norm": 7.4313810432804575, "learning_rate": 8.813532686896861e-06, "loss": 18.0759, "step": 13522 }, { "epoch": 0.2471895735463469, "grad_norm": 5.945238925825888, "learning_rate": 8.813341236313527e-06, "loss": 17.5475, "step": 13523 }, { "epoch": 0.24720785274279342, "grad_norm": 8.513827079892803, "learning_rate": 8.813149772364758e-06, "loss": 18.2024, "step": 13524 }, { "epoch": 0.24722613193923995, "grad_norm": 5.6187932784378125, "learning_rate": 8.812958295051232e-06, "loss": 17.0056, "step": 13525 }, { "epoch": 0.2472444111356865, "grad_norm": 7.632836096581084, "learning_rate": 8.812766804373617e-06, "loss": 18.03, "step": 13526 }, { "epoch": 0.247262690332133, "grad_norm": 6.981690508258919, "learning_rate": 8.812575300332587e-06, "loss": 17.6715, "step": 13527 }, { "epoch": 0.24728096952857953, "grad_norm": 8.709129676610265, "learning_rate": 8.81238378292881e-06, "loss": 18.5582, "step": 13528 }, { "epoch": 0.24729924872502604, "grad_norm": 6.365832607157043, "learning_rate": 8.812192252162958e-06, "loss": 17.5791, "step": 13529 }, { "epoch": 0.24731752792147257, "grad_norm": 6.899931755926244, "learning_rate": 8.812000708035704e-06, "loss": 17.9722, "step": 13530 }, { "epoch": 0.2473358071179191, "grad_norm": 7.397138075266554, "learning_rate": 8.811809150547718e-06, "loss": 17.7979, "step": 13531 }, { "epoch": 0.24735408631436562, "grad_norm": 6.563167452099872, "learning_rate": 8.811617579699671e-06, "loss": 17.4955, "step": 13532 }, { "epoch": 0.24737236551081215, "grad_norm": 6.41796307114215, "learning_rate": 8.811425995492238e-06, "loss": 17.4837, "step": 13533 }, { "epoch": 0.24739064470725866, "grad_norm": 7.329600825860249, "learning_rate": 8.811234397926085e-06, "loss": 17.8683, "step": 13534 }, { "epoch": 0.2474089239037052, "grad_norm": 7.651120667298316, "learning_rate": 8.811042787001887e-06, "loss": 17.6583, "step": 13535 }, { "epoch": 0.2474272031001517, "grad_norm": 6.719738297247194, "learning_rate": 8.810851162720315e-06, "loss": 17.9221, "step": 13536 }, { "epoch": 0.24744548229659824, "grad_norm": 5.9192372353376745, "learning_rate": 8.81065952508204e-06, "loss": 17.3504, "step": 13537 }, { "epoch": 0.24746376149304478, "grad_norm": 6.505587578775084, "learning_rate": 8.810467874087733e-06, "loss": 17.4247, "step": 13538 }, { "epoch": 0.24748204068949128, "grad_norm": 6.073858449932765, "learning_rate": 8.810276209738069e-06, "loss": 17.3974, "step": 13539 }, { "epoch": 0.24750031988593782, "grad_norm": 7.17598401883612, "learning_rate": 8.810084532033715e-06, "loss": 17.7629, "step": 13540 }, { "epoch": 0.24751859908238433, "grad_norm": 6.072667004573831, "learning_rate": 8.809892840975347e-06, "loss": 17.5451, "step": 13541 }, { "epoch": 0.24753687827883086, "grad_norm": 5.8307657994823545, "learning_rate": 8.809701136563635e-06, "loss": 17.2472, "step": 13542 }, { "epoch": 0.2475551574752774, "grad_norm": 7.9425616507954215, "learning_rate": 8.80950941879925e-06, "loss": 18.4223, "step": 13543 }, { "epoch": 0.2475734366717239, "grad_norm": 6.61336833311643, "learning_rate": 8.809317687682865e-06, "loss": 17.6788, "step": 13544 }, { "epoch": 0.24759171586817044, "grad_norm": 7.353439073462062, "learning_rate": 8.809125943215153e-06, "loss": 17.7704, "step": 13545 }, { "epoch": 0.24760999506461695, "grad_norm": 6.87891029506922, "learning_rate": 8.808934185396787e-06, "loss": 17.5211, "step": 13546 }, { "epoch": 0.24762827426106349, "grad_norm": 7.2274130161211385, "learning_rate": 8.808742414228435e-06, "loss": 17.3957, "step": 13547 }, { "epoch": 0.24764655345751002, "grad_norm": 6.5308196450905225, "learning_rate": 8.808550629710772e-06, "loss": 17.7209, "step": 13548 }, { "epoch": 0.24766483265395653, "grad_norm": 5.982434903221757, "learning_rate": 8.808358831844468e-06, "loss": 17.1228, "step": 13549 }, { "epoch": 0.24768311185040306, "grad_norm": 6.148208212333493, "learning_rate": 8.808167020630198e-06, "loss": 17.524, "step": 13550 }, { "epoch": 0.24770139104684957, "grad_norm": 6.485957048327351, "learning_rate": 8.807975196068633e-06, "loss": 17.6687, "step": 13551 }, { "epoch": 0.2477196702432961, "grad_norm": 5.341858827419484, "learning_rate": 8.807783358160447e-06, "loss": 17.0652, "step": 13552 }, { "epoch": 0.24773794943974262, "grad_norm": 8.274691688700468, "learning_rate": 8.807591506906307e-06, "loss": 18.3032, "step": 13553 }, { "epoch": 0.24775622863618915, "grad_norm": 6.638551424156307, "learning_rate": 8.807399642306894e-06, "loss": 17.8078, "step": 13554 }, { "epoch": 0.2477745078326357, "grad_norm": 5.17861307736317, "learning_rate": 8.80720776436287e-06, "loss": 17.094, "step": 13555 }, { "epoch": 0.2477927870290822, "grad_norm": 7.188883248739621, "learning_rate": 8.807015873074918e-06, "loss": 17.9123, "step": 13556 }, { "epoch": 0.24781106622552873, "grad_norm": 6.701056752872817, "learning_rate": 8.806823968443704e-06, "loss": 17.7605, "step": 13557 }, { "epoch": 0.24782934542197524, "grad_norm": 6.839555891126196, "learning_rate": 8.806632050469901e-06, "loss": 17.6233, "step": 13558 }, { "epoch": 0.24784762461842177, "grad_norm": 7.8974902605052035, "learning_rate": 8.806440119154185e-06, "loss": 18.1118, "step": 13559 }, { "epoch": 0.2478659038148683, "grad_norm": 5.850233448254094, "learning_rate": 8.806248174497225e-06, "loss": 17.1914, "step": 13560 }, { "epoch": 0.24788418301131482, "grad_norm": 6.1803635027236, "learning_rate": 8.806056216499697e-06, "loss": 17.3134, "step": 13561 }, { "epoch": 0.24790246220776135, "grad_norm": 7.44515695266595, "learning_rate": 8.805864245162272e-06, "loss": 17.8741, "step": 13562 }, { "epoch": 0.24792074140420786, "grad_norm": 6.730564786986195, "learning_rate": 8.805672260485623e-06, "loss": 17.6039, "step": 13563 }, { "epoch": 0.2479390206006544, "grad_norm": 6.6264531741244195, "learning_rate": 8.805480262470422e-06, "loss": 17.7734, "step": 13564 }, { "epoch": 0.24795729979710093, "grad_norm": 6.879737015030938, "learning_rate": 8.805288251117343e-06, "loss": 17.3106, "step": 13565 }, { "epoch": 0.24797557899354744, "grad_norm": 6.596703873166253, "learning_rate": 8.805096226427059e-06, "loss": 17.6785, "step": 13566 }, { "epoch": 0.24799385818999398, "grad_norm": 6.330324217831363, "learning_rate": 8.804904188400243e-06, "loss": 17.4953, "step": 13567 }, { "epoch": 0.24801213738644048, "grad_norm": 6.476138941986639, "learning_rate": 8.80471213703757e-06, "loss": 17.8462, "step": 13568 }, { "epoch": 0.24803041658288702, "grad_norm": 7.202468307013269, "learning_rate": 8.804520072339709e-06, "loss": 17.9772, "step": 13569 }, { "epoch": 0.24804869577933353, "grad_norm": 6.487313451592617, "learning_rate": 8.804327994307335e-06, "loss": 17.4103, "step": 13570 }, { "epoch": 0.24806697497578006, "grad_norm": 5.396480993896546, "learning_rate": 8.804135902941121e-06, "loss": 16.9656, "step": 13571 }, { "epoch": 0.2480852541722266, "grad_norm": 6.209982091023376, "learning_rate": 8.803943798241744e-06, "loss": 17.1646, "step": 13572 }, { "epoch": 0.2481035333686731, "grad_norm": 6.259138045292799, "learning_rate": 8.80375168020987e-06, "loss": 17.5918, "step": 13573 }, { "epoch": 0.24812181256511964, "grad_norm": 6.4363935474218, "learning_rate": 8.803559548846178e-06, "loss": 17.6681, "step": 13574 }, { "epoch": 0.24814009176156615, "grad_norm": 8.033724951282872, "learning_rate": 8.803367404151341e-06, "loss": 18.1917, "step": 13575 }, { "epoch": 0.2481583709580127, "grad_norm": 8.919849199637072, "learning_rate": 8.803175246126032e-06, "loss": 18.8369, "step": 13576 }, { "epoch": 0.24817665015445922, "grad_norm": 8.243067495335772, "learning_rate": 8.802983074770922e-06, "loss": 18.2087, "step": 13577 }, { "epoch": 0.24819492935090573, "grad_norm": 6.223858430854021, "learning_rate": 8.802790890086686e-06, "loss": 17.5654, "step": 13578 }, { "epoch": 0.24821320854735227, "grad_norm": 7.387343307779693, "learning_rate": 8.802598692074e-06, "loss": 18.1791, "step": 13579 }, { "epoch": 0.24823148774379877, "grad_norm": 6.975913950209664, "learning_rate": 8.802406480733534e-06, "loss": 17.8566, "step": 13580 }, { "epoch": 0.2482497669402453, "grad_norm": 5.483926267998872, "learning_rate": 8.802214256065963e-06, "loss": 17.0997, "step": 13581 }, { "epoch": 0.24826804613669184, "grad_norm": 7.6291144664324415, "learning_rate": 8.802022018071961e-06, "loss": 18.0416, "step": 13582 }, { "epoch": 0.24828632533313835, "grad_norm": 6.39217728338996, "learning_rate": 8.801829766752203e-06, "loss": 17.6541, "step": 13583 }, { "epoch": 0.2483046045295849, "grad_norm": 6.595641814029959, "learning_rate": 8.80163750210736e-06, "loss": 17.8408, "step": 13584 }, { "epoch": 0.2483228837260314, "grad_norm": 7.254710124077112, "learning_rate": 8.80144522413811e-06, "loss": 17.797, "step": 13585 }, { "epoch": 0.24834116292247793, "grad_norm": 6.5113618090311345, "learning_rate": 8.801252932845122e-06, "loss": 17.67, "step": 13586 }, { "epoch": 0.24835944211892444, "grad_norm": 6.110250760314191, "learning_rate": 8.801060628229072e-06, "loss": 17.4549, "step": 13587 }, { "epoch": 0.24837772131537098, "grad_norm": 7.17153914014889, "learning_rate": 8.800868310290635e-06, "loss": 17.944, "step": 13588 }, { "epoch": 0.2483960005118175, "grad_norm": 6.6406924892044685, "learning_rate": 8.800675979030484e-06, "loss": 17.746, "step": 13589 }, { "epoch": 0.24841427970826402, "grad_norm": 7.9014519895766355, "learning_rate": 8.800483634449295e-06, "loss": 18.5655, "step": 13590 }, { "epoch": 0.24843255890471055, "grad_norm": 6.178339948853413, "learning_rate": 8.80029127654774e-06, "loss": 17.3984, "step": 13591 }, { "epoch": 0.24845083810115706, "grad_norm": 7.3444746228631095, "learning_rate": 8.800098905326493e-06, "loss": 18.1451, "step": 13592 }, { "epoch": 0.2484691172976036, "grad_norm": 7.1783347093880945, "learning_rate": 8.79990652078623e-06, "loss": 17.7128, "step": 13593 }, { "epoch": 0.24848739649405013, "grad_norm": 6.075669041088203, "learning_rate": 8.799714122927625e-06, "loss": 17.5606, "step": 13594 }, { "epoch": 0.24850567569049664, "grad_norm": 6.382625845225516, "learning_rate": 8.79952171175135e-06, "loss": 17.4704, "step": 13595 }, { "epoch": 0.24852395488694318, "grad_norm": 6.3752340265404355, "learning_rate": 8.799329287258083e-06, "loss": 17.5366, "step": 13596 }, { "epoch": 0.24854223408338968, "grad_norm": 6.915529798093468, "learning_rate": 8.799136849448496e-06, "loss": 17.659, "step": 13597 }, { "epoch": 0.24856051327983622, "grad_norm": 6.612852806965295, "learning_rate": 8.798944398323261e-06, "loss": 17.8432, "step": 13598 }, { "epoch": 0.24857879247628276, "grad_norm": 6.2407458376604055, "learning_rate": 8.798751933883058e-06, "loss": 17.5232, "step": 13599 }, { "epoch": 0.24859707167272926, "grad_norm": 6.711701644131944, "learning_rate": 8.79855945612856e-06, "loss": 17.7623, "step": 13600 }, { "epoch": 0.2486153508691758, "grad_norm": 7.128477037322055, "learning_rate": 8.79836696506044e-06, "loss": 18.0356, "step": 13601 }, { "epoch": 0.2486336300656223, "grad_norm": 5.804775169214023, "learning_rate": 8.798174460679374e-06, "loss": 17.2787, "step": 13602 }, { "epoch": 0.24865190926206884, "grad_norm": 5.70416433023589, "learning_rate": 8.797981942986035e-06, "loss": 17.5225, "step": 13603 }, { "epoch": 0.24867018845851535, "grad_norm": 7.346183355605944, "learning_rate": 8.797789411981098e-06, "loss": 18.0112, "step": 13604 }, { "epoch": 0.2486884676549619, "grad_norm": 8.26468927004845, "learning_rate": 8.797596867665241e-06, "loss": 18.4587, "step": 13605 }, { "epoch": 0.24870674685140842, "grad_norm": 8.255270705426234, "learning_rate": 8.797404310039135e-06, "loss": 18.4868, "step": 13606 }, { "epoch": 0.24872502604785493, "grad_norm": 8.923343128536514, "learning_rate": 8.797211739103458e-06, "loss": 18.3208, "step": 13607 }, { "epoch": 0.24874330524430147, "grad_norm": 5.397304363700296, "learning_rate": 8.797019154858881e-06, "loss": 16.9809, "step": 13608 }, { "epoch": 0.24876158444074797, "grad_norm": 7.430666195594035, "learning_rate": 8.796826557306083e-06, "loss": 18.066, "step": 13609 }, { "epoch": 0.2487798636371945, "grad_norm": 6.003755063812365, "learning_rate": 8.796633946445737e-06, "loss": 17.4161, "step": 13610 }, { "epoch": 0.24879814283364104, "grad_norm": 5.620294923954437, "learning_rate": 8.796441322278518e-06, "loss": 17.3661, "step": 13611 }, { "epoch": 0.24881642203008755, "grad_norm": 5.7806892935003615, "learning_rate": 8.796248684805103e-06, "loss": 17.2277, "step": 13612 }, { "epoch": 0.2488347012265341, "grad_norm": 6.581495995371712, "learning_rate": 8.796056034026164e-06, "loss": 17.485, "step": 13613 }, { "epoch": 0.2488529804229806, "grad_norm": 7.568553920028167, "learning_rate": 8.795863369942379e-06, "loss": 18.2171, "step": 13614 }, { "epoch": 0.24887125961942713, "grad_norm": 12.072031985212378, "learning_rate": 8.795670692554422e-06, "loss": 18.5831, "step": 13615 }, { "epoch": 0.24888953881587367, "grad_norm": 6.5694429939948185, "learning_rate": 8.79547800186297e-06, "loss": 17.3885, "step": 13616 }, { "epoch": 0.24890781801232018, "grad_norm": 7.464572581084682, "learning_rate": 8.795285297868695e-06, "loss": 18.21, "step": 13617 }, { "epoch": 0.2489260972087667, "grad_norm": 7.097192977191152, "learning_rate": 8.795092580572274e-06, "loss": 17.9665, "step": 13618 }, { "epoch": 0.24894437640521322, "grad_norm": 5.887254196280174, "learning_rate": 8.794899849974384e-06, "loss": 17.5091, "step": 13619 }, { "epoch": 0.24896265560165975, "grad_norm": 6.292638117710199, "learning_rate": 8.7947071060757e-06, "loss": 17.9647, "step": 13620 }, { "epoch": 0.24898093479810626, "grad_norm": 5.610482638041628, "learning_rate": 8.794514348876894e-06, "loss": 17.3419, "step": 13621 }, { "epoch": 0.2489992139945528, "grad_norm": 6.985345441765878, "learning_rate": 8.794321578378649e-06, "loss": 17.9621, "step": 13622 }, { "epoch": 0.24901749319099933, "grad_norm": 7.2929065272559646, "learning_rate": 8.794128794581634e-06, "loss": 18.2403, "step": 13623 }, { "epoch": 0.24903577238744584, "grad_norm": 6.574658771008031, "learning_rate": 8.793935997486525e-06, "loss": 17.8019, "step": 13624 }, { "epoch": 0.24905405158389238, "grad_norm": 5.88672130310422, "learning_rate": 8.793743187094002e-06, "loss": 17.4474, "step": 13625 }, { "epoch": 0.24907233078033889, "grad_norm": 8.240587812286298, "learning_rate": 8.793550363404737e-06, "loss": 18.3002, "step": 13626 }, { "epoch": 0.24909060997678542, "grad_norm": 7.550864745136357, "learning_rate": 8.793357526419406e-06, "loss": 17.6373, "step": 13627 }, { "epoch": 0.24910888917323196, "grad_norm": 7.53562386247126, "learning_rate": 8.793164676138687e-06, "loss": 17.9381, "step": 13628 }, { "epoch": 0.24912716836967846, "grad_norm": 6.208485718974186, "learning_rate": 8.792971812563258e-06, "loss": 17.4192, "step": 13629 }, { "epoch": 0.249145447566125, "grad_norm": 6.042275434645566, "learning_rate": 8.792778935693788e-06, "loss": 17.1798, "step": 13630 }, { "epoch": 0.2491637267625715, "grad_norm": 7.903588151094938, "learning_rate": 8.792586045530958e-06, "loss": 18.0765, "step": 13631 }, { "epoch": 0.24918200595901804, "grad_norm": 6.725425760282418, "learning_rate": 8.792393142075443e-06, "loss": 17.4452, "step": 13632 }, { "epoch": 0.24920028515546458, "grad_norm": 7.613950756375264, "learning_rate": 8.79220022532792e-06, "loss": 17.8006, "step": 13633 }, { "epoch": 0.2492185643519111, "grad_norm": 6.311147051785585, "learning_rate": 8.792007295289064e-06, "loss": 17.6108, "step": 13634 }, { "epoch": 0.24923684354835762, "grad_norm": 8.6277951319873, "learning_rate": 8.791814351959551e-06, "loss": 17.8419, "step": 13635 }, { "epoch": 0.24925512274480413, "grad_norm": 6.009905504629565, "learning_rate": 8.79162139534006e-06, "loss": 17.4084, "step": 13636 }, { "epoch": 0.24927340194125067, "grad_norm": 7.029158841169786, "learning_rate": 8.791428425431263e-06, "loss": 17.9272, "step": 13637 }, { "epoch": 0.24929168113769717, "grad_norm": 9.781101938345776, "learning_rate": 8.791235442233837e-06, "loss": 18.2717, "step": 13638 }, { "epoch": 0.2493099603341437, "grad_norm": 8.353990401220457, "learning_rate": 8.791042445748462e-06, "loss": 18.5817, "step": 13639 }, { "epoch": 0.24932823953059025, "grad_norm": 6.6549857354042885, "learning_rate": 8.790849435975813e-06, "loss": 17.6618, "step": 13640 }, { "epoch": 0.24934651872703675, "grad_norm": 6.2320122434893594, "learning_rate": 8.790656412916563e-06, "loss": 17.4033, "step": 13641 }, { "epoch": 0.2493647979234833, "grad_norm": 7.176249965546837, "learning_rate": 8.790463376571392e-06, "loss": 17.7429, "step": 13642 }, { "epoch": 0.2493830771199298, "grad_norm": 5.734651787696095, "learning_rate": 8.790270326940976e-06, "loss": 17.1494, "step": 13643 }, { "epoch": 0.24940135631637633, "grad_norm": 6.9045363113073455, "learning_rate": 8.790077264025992e-06, "loss": 17.9641, "step": 13644 }, { "epoch": 0.24941963551282287, "grad_norm": 5.184670303198655, "learning_rate": 8.789884187827116e-06, "loss": 17.0292, "step": 13645 }, { "epoch": 0.24943791470926938, "grad_norm": 7.711816299566391, "learning_rate": 8.789691098345023e-06, "loss": 17.9144, "step": 13646 }, { "epoch": 0.2494561939057159, "grad_norm": 7.719521491741951, "learning_rate": 8.789497995580395e-06, "loss": 18.3423, "step": 13647 }, { "epoch": 0.24947447310216242, "grad_norm": 5.627474291296823, "learning_rate": 8.789304879533901e-06, "loss": 17.2978, "step": 13648 }, { "epoch": 0.24949275229860896, "grad_norm": 6.158697685436154, "learning_rate": 8.789111750206224e-06, "loss": 17.36, "step": 13649 }, { "epoch": 0.2495110314950555, "grad_norm": 6.65429193676253, "learning_rate": 8.78891860759804e-06, "loss": 17.7113, "step": 13650 }, { "epoch": 0.249529310691502, "grad_norm": 5.166960750257122, "learning_rate": 8.788725451710026e-06, "loss": 16.8692, "step": 13651 }, { "epoch": 0.24954758988794853, "grad_norm": 6.376480383954239, "learning_rate": 8.788532282542857e-06, "loss": 17.4281, "step": 13652 }, { "epoch": 0.24956586908439504, "grad_norm": 5.847633936487089, "learning_rate": 8.788339100097209e-06, "loss": 17.232, "step": 13653 }, { "epoch": 0.24958414828084158, "grad_norm": 8.168623132831579, "learning_rate": 8.788145904373765e-06, "loss": 18.1684, "step": 13654 }, { "epoch": 0.24960242747728809, "grad_norm": 7.436815384741335, "learning_rate": 8.787952695373197e-06, "loss": 18.0997, "step": 13655 }, { "epoch": 0.24962070667373462, "grad_norm": 5.440164677593967, "learning_rate": 8.787759473096182e-06, "loss": 17.3317, "step": 13656 }, { "epoch": 0.24963898587018116, "grad_norm": 7.515954411219964, "learning_rate": 8.7875662375434e-06, "loss": 17.7748, "step": 13657 }, { "epoch": 0.24965726506662766, "grad_norm": 7.113133122298919, "learning_rate": 8.787372988715525e-06, "loss": 18.1626, "step": 13658 }, { "epoch": 0.2496755442630742, "grad_norm": 7.010804874667533, "learning_rate": 8.78717972661324e-06, "loss": 17.7421, "step": 13659 }, { "epoch": 0.2496938234595207, "grad_norm": 5.665932415627742, "learning_rate": 8.786986451237217e-06, "loss": 17.1337, "step": 13660 }, { "epoch": 0.24971210265596724, "grad_norm": 7.371647518398055, "learning_rate": 8.786793162588135e-06, "loss": 18.1092, "step": 13661 }, { "epoch": 0.24973038185241378, "grad_norm": 5.964545631329175, "learning_rate": 8.78659986066667e-06, "loss": 17.1018, "step": 13662 }, { "epoch": 0.2497486610488603, "grad_norm": 6.145087876449944, "learning_rate": 8.786406545473503e-06, "loss": 17.179, "step": 13663 }, { "epoch": 0.24976694024530682, "grad_norm": 7.724709929005375, "learning_rate": 8.786213217009309e-06, "loss": 17.7277, "step": 13664 }, { "epoch": 0.24978521944175333, "grad_norm": 5.767009279703939, "learning_rate": 8.786019875274764e-06, "loss": 17.0923, "step": 13665 }, { "epoch": 0.24980349863819987, "grad_norm": 5.577553796148244, "learning_rate": 8.785826520270553e-06, "loss": 17.1695, "step": 13666 }, { "epoch": 0.2498217778346464, "grad_norm": 7.324165428076282, "learning_rate": 8.785633151997343e-06, "loss": 17.6439, "step": 13667 }, { "epoch": 0.2498400570310929, "grad_norm": 7.22453251197266, "learning_rate": 8.785439770455821e-06, "loss": 17.5587, "step": 13668 }, { "epoch": 0.24985833622753945, "grad_norm": 6.3350418539347215, "learning_rate": 8.785246375646662e-06, "loss": 17.3439, "step": 13669 }, { "epoch": 0.24987661542398595, "grad_norm": 8.105900333242337, "learning_rate": 8.78505296757054e-06, "loss": 18.0721, "step": 13670 }, { "epoch": 0.2498948946204325, "grad_norm": 6.073732518571119, "learning_rate": 8.784859546228136e-06, "loss": 17.3716, "step": 13671 }, { "epoch": 0.249913173816879, "grad_norm": 7.028398136879655, "learning_rate": 8.78466611162013e-06, "loss": 17.7907, "step": 13672 }, { "epoch": 0.24993145301332553, "grad_norm": 7.816419386903626, "learning_rate": 8.784472663747195e-06, "loss": 17.9682, "step": 13673 }, { "epoch": 0.24994973220977207, "grad_norm": 7.585974508246072, "learning_rate": 8.784279202610012e-06, "loss": 17.9536, "step": 13674 }, { "epoch": 0.24996801140621858, "grad_norm": 6.368820454232451, "learning_rate": 8.784085728209261e-06, "loss": 17.5333, "step": 13675 }, { "epoch": 0.2499862906026651, "grad_norm": 6.801929375828293, "learning_rate": 8.783892240545618e-06, "loss": 17.6958, "step": 13676 }, { "epoch": 0.2500045697991116, "grad_norm": 7.71347140343787, "learning_rate": 8.783698739619759e-06, "loss": 18.0251, "step": 13677 }, { "epoch": 0.25002284899555816, "grad_norm": 7.172448762691464, "learning_rate": 8.783505225432364e-06, "loss": 17.8857, "step": 13678 }, { "epoch": 0.2500411281920047, "grad_norm": 7.8308999600022835, "learning_rate": 8.783311697984113e-06, "loss": 18.3854, "step": 13679 }, { "epoch": 0.2500594073884512, "grad_norm": 7.909860254259011, "learning_rate": 8.783118157275683e-06, "loss": 18.1653, "step": 13680 }, { "epoch": 0.2500776865848977, "grad_norm": 7.399177070632512, "learning_rate": 8.78292460330775e-06, "loss": 18.1998, "step": 13681 }, { "epoch": 0.25009596578134424, "grad_norm": 5.943759962856429, "learning_rate": 8.782731036080996e-06, "loss": 17.5429, "step": 13682 }, { "epoch": 0.2501142449777908, "grad_norm": 7.135033931021666, "learning_rate": 8.782537455596099e-06, "loss": 17.7688, "step": 13683 }, { "epoch": 0.2501325241742373, "grad_norm": 6.35122949967104, "learning_rate": 8.782343861853735e-06, "loss": 17.3839, "step": 13684 }, { "epoch": 0.25015080337068385, "grad_norm": 9.789846079296307, "learning_rate": 8.782150254854584e-06, "loss": 18.6432, "step": 13685 }, { "epoch": 0.25016908256713033, "grad_norm": 7.367962037787293, "learning_rate": 8.781956634599325e-06, "loss": 18.0811, "step": 13686 }, { "epoch": 0.25018736176357687, "grad_norm": 8.561889029472841, "learning_rate": 8.781763001088636e-06, "loss": 18.1209, "step": 13687 }, { "epoch": 0.2502056409600234, "grad_norm": 6.019826809806316, "learning_rate": 8.781569354323197e-06, "loss": 17.1458, "step": 13688 }, { "epoch": 0.25022392015646994, "grad_norm": 6.142316335916776, "learning_rate": 8.781375694303683e-06, "loss": 17.2524, "step": 13689 }, { "epoch": 0.2502421993529165, "grad_norm": 6.430875967650894, "learning_rate": 8.781182021030777e-06, "loss": 17.3811, "step": 13690 }, { "epoch": 0.25026047854936295, "grad_norm": 6.6397344390401845, "learning_rate": 8.780988334505156e-06, "loss": 17.4814, "step": 13691 }, { "epoch": 0.2502787577458095, "grad_norm": 6.656274131397097, "learning_rate": 8.7807946347275e-06, "loss": 17.7153, "step": 13692 }, { "epoch": 0.250297036942256, "grad_norm": 6.486317260392342, "learning_rate": 8.780600921698485e-06, "loss": 17.6734, "step": 13693 }, { "epoch": 0.25031531613870256, "grad_norm": 6.526161603655303, "learning_rate": 8.780407195418792e-06, "loss": 17.3982, "step": 13694 }, { "epoch": 0.25033359533514904, "grad_norm": 6.3537219414511, "learning_rate": 8.7802134558891e-06, "loss": 17.364, "step": 13695 }, { "epoch": 0.2503518745315956, "grad_norm": 6.729428797262897, "learning_rate": 8.78001970311009e-06, "loss": 17.9462, "step": 13696 }, { "epoch": 0.2503701537280421, "grad_norm": 6.65367901168732, "learning_rate": 8.779825937082436e-06, "loss": 17.4791, "step": 13697 }, { "epoch": 0.25038843292448865, "grad_norm": 5.8306443302054385, "learning_rate": 8.779632157806821e-06, "loss": 17.4159, "step": 13698 }, { "epoch": 0.2504067121209352, "grad_norm": 6.171992721075378, "learning_rate": 8.779438365283924e-06, "loss": 17.364, "step": 13699 }, { "epoch": 0.25042499131738166, "grad_norm": 7.253356684839225, "learning_rate": 8.779244559514424e-06, "loss": 17.9746, "step": 13700 }, { "epoch": 0.2504432705138282, "grad_norm": 6.053186052422697, "learning_rate": 8.779050740498998e-06, "loss": 17.4817, "step": 13701 }, { "epoch": 0.25046154971027473, "grad_norm": 6.230493315371202, "learning_rate": 8.77885690823833e-06, "loss": 17.1891, "step": 13702 }, { "epoch": 0.25047982890672127, "grad_norm": 6.227282377287225, "learning_rate": 8.778663062733093e-06, "loss": 17.286, "step": 13703 }, { "epoch": 0.2504981081031678, "grad_norm": 7.03631259111469, "learning_rate": 8.778469203983971e-06, "loss": 17.4506, "step": 13704 }, { "epoch": 0.2505163872996143, "grad_norm": 6.550702438917967, "learning_rate": 8.778275331991643e-06, "loss": 17.6496, "step": 13705 }, { "epoch": 0.2505346664960608, "grad_norm": 6.766698513387287, "learning_rate": 8.778081446756787e-06, "loss": 17.5792, "step": 13706 }, { "epoch": 0.25055294569250736, "grad_norm": 6.666839859058256, "learning_rate": 8.777887548280084e-06, "loss": 17.4515, "step": 13707 }, { "epoch": 0.2505712248889539, "grad_norm": 6.154213037421661, "learning_rate": 8.777693636562212e-06, "loss": 17.2226, "step": 13708 }, { "epoch": 0.2505895040854004, "grad_norm": 8.374144174894667, "learning_rate": 8.777499711603854e-06, "loss": 18.1339, "step": 13709 }, { "epoch": 0.2506077832818469, "grad_norm": 7.375942271841453, "learning_rate": 8.777305773405684e-06, "loss": 17.7976, "step": 13710 }, { "epoch": 0.25062606247829344, "grad_norm": 7.844301927949076, "learning_rate": 8.777111821968386e-06, "loss": 18.0166, "step": 13711 }, { "epoch": 0.25064434167474, "grad_norm": 6.245290524021204, "learning_rate": 8.776917857292641e-06, "loss": 17.3728, "step": 13712 }, { "epoch": 0.2506626208711865, "grad_norm": 7.027406029170456, "learning_rate": 8.776723879379126e-06, "loss": 18.0623, "step": 13713 }, { "epoch": 0.25068090006763305, "grad_norm": 6.170920509560837, "learning_rate": 8.77652988822852e-06, "loss": 17.3041, "step": 13714 }, { "epoch": 0.25069917926407953, "grad_norm": 7.877857309985771, "learning_rate": 8.776335883841504e-06, "loss": 18.2322, "step": 13715 }, { "epoch": 0.25071745846052607, "grad_norm": 7.326621885665798, "learning_rate": 8.776141866218761e-06, "loss": 17.8354, "step": 13716 }, { "epoch": 0.2507357376569726, "grad_norm": 7.026959857664446, "learning_rate": 8.775947835360967e-06, "loss": 17.7837, "step": 13717 }, { "epoch": 0.25075401685341914, "grad_norm": 7.637534254447708, "learning_rate": 8.775753791268804e-06, "loss": 18.2689, "step": 13718 }, { "epoch": 0.2507722960498657, "grad_norm": 7.447805613172726, "learning_rate": 8.775559733942952e-06, "loss": 17.8548, "step": 13719 }, { "epoch": 0.25079057524631215, "grad_norm": 6.978672598215624, "learning_rate": 8.775365663384088e-06, "loss": 17.8056, "step": 13720 }, { "epoch": 0.2508088544427587, "grad_norm": 6.789059085714313, "learning_rate": 8.775171579592898e-06, "loss": 17.8224, "step": 13721 }, { "epoch": 0.2508271336392052, "grad_norm": 6.528536194001972, "learning_rate": 8.774977482570058e-06, "loss": 17.9032, "step": 13722 }, { "epoch": 0.25084541283565176, "grad_norm": 5.894994552697368, "learning_rate": 8.77478337231625e-06, "loss": 17.3327, "step": 13723 }, { "epoch": 0.2508636920320983, "grad_norm": 6.889212073845134, "learning_rate": 8.774589248832153e-06, "loss": 17.5949, "step": 13724 }, { "epoch": 0.2508819712285448, "grad_norm": 5.663663159463208, "learning_rate": 8.77439511211845e-06, "loss": 17.4429, "step": 13725 }, { "epoch": 0.2509002504249913, "grad_norm": 6.10287318775543, "learning_rate": 8.774200962175816e-06, "loss": 17.5407, "step": 13726 }, { "epoch": 0.25091852962143785, "grad_norm": 7.309553641820336, "learning_rate": 8.77400679900494e-06, "loss": 17.8893, "step": 13727 }, { "epoch": 0.2509368088178844, "grad_norm": 6.064058751608981, "learning_rate": 8.773812622606494e-06, "loss": 17.4146, "step": 13728 }, { "epoch": 0.25095508801433086, "grad_norm": 6.750710304996495, "learning_rate": 8.773618432981163e-06, "loss": 17.7342, "step": 13729 }, { "epoch": 0.2509733672107774, "grad_norm": 6.2763482507359925, "learning_rate": 8.773424230129628e-06, "loss": 17.5536, "step": 13730 }, { "epoch": 0.25099164640722393, "grad_norm": 7.598059918733686, "learning_rate": 8.773230014052568e-06, "loss": 18.1678, "step": 13731 }, { "epoch": 0.25100992560367047, "grad_norm": 7.145922873989569, "learning_rate": 8.773035784750663e-06, "loss": 17.4477, "step": 13732 }, { "epoch": 0.251028204800117, "grad_norm": 6.458154817569209, "learning_rate": 8.772841542224596e-06, "loss": 17.4468, "step": 13733 }, { "epoch": 0.2510464839965635, "grad_norm": 7.087951311196237, "learning_rate": 8.772647286475047e-06, "loss": 18.3194, "step": 13734 }, { "epoch": 0.25106476319301, "grad_norm": 6.436018704490948, "learning_rate": 8.772453017502695e-06, "loss": 17.663, "step": 13735 }, { "epoch": 0.25108304238945656, "grad_norm": 6.977212610563405, "learning_rate": 8.772258735308225e-06, "loss": 17.5539, "step": 13736 }, { "epoch": 0.2511013215859031, "grad_norm": 6.239105916239187, "learning_rate": 8.772064439892314e-06, "loss": 17.4486, "step": 13737 }, { "epoch": 0.25111960078234963, "grad_norm": 6.015031005242834, "learning_rate": 8.771870131255646e-06, "loss": 17.2309, "step": 13738 }, { "epoch": 0.2511378799787961, "grad_norm": 6.721088540210563, "learning_rate": 8.771675809398898e-06, "loss": 17.5095, "step": 13739 }, { "epoch": 0.25115615917524264, "grad_norm": 6.41520292709623, "learning_rate": 8.771481474322755e-06, "loss": 17.5005, "step": 13740 }, { "epoch": 0.2511744383716892, "grad_norm": 6.2391058121266, "learning_rate": 8.771287126027897e-06, "loss": 17.1922, "step": 13741 }, { "epoch": 0.2511927175681357, "grad_norm": 7.028197784691112, "learning_rate": 8.771092764515006e-06, "loss": 17.6695, "step": 13742 }, { "epoch": 0.25121099676458225, "grad_norm": 6.927224606206834, "learning_rate": 8.77089838978476e-06, "loss": 17.3227, "step": 13743 }, { "epoch": 0.25122927596102873, "grad_norm": 5.501662333540958, "learning_rate": 8.770704001837843e-06, "loss": 17.1795, "step": 13744 }, { "epoch": 0.25124755515747527, "grad_norm": 7.98795049015665, "learning_rate": 8.770509600674934e-06, "loss": 17.6757, "step": 13745 }, { "epoch": 0.2512658343539218, "grad_norm": 6.386750052454475, "learning_rate": 8.770315186296719e-06, "loss": 17.5961, "step": 13746 }, { "epoch": 0.25128411355036834, "grad_norm": 5.7846685922114816, "learning_rate": 8.770120758703874e-06, "loss": 17.1132, "step": 13747 }, { "epoch": 0.2513023927468149, "grad_norm": 7.50890297590246, "learning_rate": 8.769926317897084e-06, "loss": 18.0496, "step": 13748 }, { "epoch": 0.25132067194326135, "grad_norm": 7.499891461814617, "learning_rate": 8.76973186387703e-06, "loss": 18.0797, "step": 13749 }, { "epoch": 0.2513389511397079, "grad_norm": 6.708457406673459, "learning_rate": 8.769537396644393e-06, "loss": 17.261, "step": 13750 }, { "epoch": 0.2513572303361544, "grad_norm": 5.309378532147494, "learning_rate": 8.769342916199854e-06, "loss": 17.024, "step": 13751 }, { "epoch": 0.25137550953260096, "grad_norm": 6.084734857501226, "learning_rate": 8.769148422544095e-06, "loss": 17.3959, "step": 13752 }, { "epoch": 0.2513937887290475, "grad_norm": 8.956039493326504, "learning_rate": 8.768953915677798e-06, "loss": 18.5505, "step": 13753 }, { "epoch": 0.251412067925494, "grad_norm": 6.96444705521746, "learning_rate": 8.768759395601645e-06, "loss": 17.9796, "step": 13754 }, { "epoch": 0.2514303471219405, "grad_norm": 6.557172749327448, "learning_rate": 8.768564862316316e-06, "loss": 17.5487, "step": 13755 }, { "epoch": 0.25144862631838705, "grad_norm": 6.0961799240180135, "learning_rate": 8.768370315822496e-06, "loss": 17.4884, "step": 13756 }, { "epoch": 0.2514669055148336, "grad_norm": 5.801513897231396, "learning_rate": 8.768175756120864e-06, "loss": 17.4211, "step": 13757 }, { "epoch": 0.2514851847112801, "grad_norm": 7.995947668387565, "learning_rate": 8.767981183212103e-06, "loss": 18.1506, "step": 13758 }, { "epoch": 0.2515034639077266, "grad_norm": 6.520774360548156, "learning_rate": 8.767786597096895e-06, "loss": 17.4924, "step": 13759 }, { "epoch": 0.25152174310417313, "grad_norm": 5.587148555102545, "learning_rate": 8.767591997775922e-06, "loss": 16.8752, "step": 13760 }, { "epoch": 0.25154002230061967, "grad_norm": 6.552470227336554, "learning_rate": 8.767397385249865e-06, "loss": 17.7395, "step": 13761 }, { "epoch": 0.2515583014970662, "grad_norm": 8.321874718686061, "learning_rate": 8.767202759519409e-06, "loss": 17.7483, "step": 13762 }, { "epoch": 0.2515765806935127, "grad_norm": 6.215825841701845, "learning_rate": 8.767008120585233e-06, "loss": 17.5189, "step": 13763 }, { "epoch": 0.2515948598899592, "grad_norm": 6.550420398143107, "learning_rate": 8.76681346844802e-06, "loss": 17.7779, "step": 13764 }, { "epoch": 0.25161313908640576, "grad_norm": 6.570276372280605, "learning_rate": 8.766618803108454e-06, "loss": 17.6302, "step": 13765 }, { "epoch": 0.2516314182828523, "grad_norm": 6.98919670996425, "learning_rate": 8.766424124567215e-06, "loss": 17.8021, "step": 13766 }, { "epoch": 0.25164969747929883, "grad_norm": 7.385807087426103, "learning_rate": 8.766229432824986e-06, "loss": 18.305, "step": 13767 }, { "epoch": 0.2516679766757453, "grad_norm": 6.239690219179112, "learning_rate": 8.76603472788245e-06, "loss": 17.2774, "step": 13768 }, { "epoch": 0.25168625587219184, "grad_norm": 7.529944805267041, "learning_rate": 8.765840009740289e-06, "loss": 17.7005, "step": 13769 }, { "epoch": 0.2517045350686384, "grad_norm": 6.707427014138782, "learning_rate": 8.765645278399187e-06, "loss": 17.8625, "step": 13770 }, { "epoch": 0.2517228142650849, "grad_norm": 6.376799995019203, "learning_rate": 8.765450533859823e-06, "loss": 17.7751, "step": 13771 }, { "epoch": 0.25174109346153145, "grad_norm": 7.842346926988082, "learning_rate": 8.765255776122884e-06, "loss": 18.1796, "step": 13772 }, { "epoch": 0.25175937265797793, "grad_norm": 7.827714310710927, "learning_rate": 8.765061005189048e-06, "loss": 17.8566, "step": 13773 }, { "epoch": 0.25177765185442447, "grad_norm": 9.714632895819868, "learning_rate": 8.764866221059e-06, "loss": 18.6999, "step": 13774 }, { "epoch": 0.251795931050871, "grad_norm": 5.1415860911994615, "learning_rate": 8.764671423733424e-06, "loss": 17.1295, "step": 13775 }, { "epoch": 0.25181421024731754, "grad_norm": 5.465218869950854, "learning_rate": 8.764476613213e-06, "loss": 17.1065, "step": 13776 }, { "epoch": 0.2518324894437641, "grad_norm": 7.352397364400496, "learning_rate": 8.764281789498412e-06, "loss": 17.879, "step": 13777 }, { "epoch": 0.25185076864021055, "grad_norm": 5.749608396722773, "learning_rate": 8.764086952590345e-06, "loss": 17.3095, "step": 13778 }, { "epoch": 0.2518690478366571, "grad_norm": 5.911839463727114, "learning_rate": 8.763892102489478e-06, "loss": 17.1792, "step": 13779 }, { "epoch": 0.2518873270331036, "grad_norm": 6.544310704774565, "learning_rate": 8.763697239196496e-06, "loss": 17.6827, "step": 13780 }, { "epoch": 0.25190560622955016, "grad_norm": 7.4383699086318815, "learning_rate": 8.763502362712082e-06, "loss": 17.9255, "step": 13781 }, { "epoch": 0.2519238854259967, "grad_norm": 5.8094518104292385, "learning_rate": 8.763307473036919e-06, "loss": 17.2015, "step": 13782 }, { "epoch": 0.2519421646224432, "grad_norm": 4.97326767951542, "learning_rate": 8.76311257017169e-06, "loss": 16.8358, "step": 13783 }, { "epoch": 0.2519604438188897, "grad_norm": 6.373518140325814, "learning_rate": 8.762917654117077e-06, "loss": 17.5874, "step": 13784 }, { "epoch": 0.25197872301533625, "grad_norm": 6.702180106813105, "learning_rate": 8.762722724873766e-06, "loss": 17.7037, "step": 13785 }, { "epoch": 0.2519970022117828, "grad_norm": 5.64079520470863, "learning_rate": 8.762527782442436e-06, "loss": 17.2127, "step": 13786 }, { "epoch": 0.2520152814082293, "grad_norm": 5.75393418504257, "learning_rate": 8.762332826823774e-06, "loss": 17.3287, "step": 13787 }, { "epoch": 0.2520335606046758, "grad_norm": 8.09878369793486, "learning_rate": 8.762137858018463e-06, "loss": 18.1549, "step": 13788 }, { "epoch": 0.25205183980112233, "grad_norm": 6.6744313033977445, "learning_rate": 8.761942876027185e-06, "loss": 17.7911, "step": 13789 }, { "epoch": 0.25207011899756887, "grad_norm": 5.864234424938747, "learning_rate": 8.761747880850622e-06, "loss": 17.4366, "step": 13790 }, { "epoch": 0.2520883981940154, "grad_norm": 5.890294583815388, "learning_rate": 8.76155287248946e-06, "loss": 17.2478, "step": 13791 }, { "epoch": 0.25210667739046194, "grad_norm": 6.613126406826565, "learning_rate": 8.76135785094438e-06, "loss": 17.4939, "step": 13792 }, { "epoch": 0.2521249565869084, "grad_norm": 5.950499194438246, "learning_rate": 8.76116281621607e-06, "loss": 17.2808, "step": 13793 }, { "epoch": 0.25214323578335496, "grad_norm": 6.577486756184717, "learning_rate": 8.760967768305208e-06, "loss": 17.8002, "step": 13794 }, { "epoch": 0.2521615149798015, "grad_norm": 7.997368502367322, "learning_rate": 8.760772707212483e-06, "loss": 18.0038, "step": 13795 }, { "epoch": 0.25217979417624803, "grad_norm": 6.024074301066061, "learning_rate": 8.760577632938574e-06, "loss": 17.196, "step": 13796 }, { "epoch": 0.2521980733726945, "grad_norm": 6.751303858541155, "learning_rate": 8.760382545484167e-06, "loss": 17.7404, "step": 13797 }, { "epoch": 0.25221635256914104, "grad_norm": 6.5231244162970095, "learning_rate": 8.760187444849946e-06, "loss": 17.4269, "step": 13798 }, { "epoch": 0.2522346317655876, "grad_norm": 6.231742603340174, "learning_rate": 8.759992331036595e-06, "loss": 17.4389, "step": 13799 }, { "epoch": 0.2522529109620341, "grad_norm": 6.846407027722996, "learning_rate": 8.759797204044796e-06, "loss": 17.6918, "step": 13800 }, { "epoch": 0.25227119015848065, "grad_norm": 5.93279747930007, "learning_rate": 8.759602063875234e-06, "loss": 17.2997, "step": 13801 }, { "epoch": 0.25228946935492713, "grad_norm": 6.880431100924478, "learning_rate": 8.759406910528595e-06, "loss": 17.5852, "step": 13802 }, { "epoch": 0.25230774855137367, "grad_norm": 6.464710572953449, "learning_rate": 8.759211744005558e-06, "loss": 17.5171, "step": 13803 }, { "epoch": 0.2523260277478202, "grad_norm": 6.6056523743240145, "learning_rate": 8.759016564306813e-06, "loss": 17.8632, "step": 13804 }, { "epoch": 0.25234430694426674, "grad_norm": 7.711844446399845, "learning_rate": 8.758821371433038e-06, "loss": 18.2702, "step": 13805 }, { "epoch": 0.2523625861407133, "grad_norm": 8.361546716754969, "learning_rate": 8.758626165384922e-06, "loss": 17.6389, "step": 13806 }, { "epoch": 0.25238086533715975, "grad_norm": 5.348057047525714, "learning_rate": 8.758430946163147e-06, "loss": 17.2781, "step": 13807 }, { "epoch": 0.2523991445336063, "grad_norm": 5.907196701423856, "learning_rate": 8.7582357137684e-06, "loss": 17.2221, "step": 13808 }, { "epoch": 0.2524174237300528, "grad_norm": 5.698606558156475, "learning_rate": 8.75804046820136e-06, "loss": 17.2344, "step": 13809 }, { "epoch": 0.25243570292649936, "grad_norm": 7.619353413647588, "learning_rate": 8.757845209462714e-06, "loss": 18.3026, "step": 13810 }, { "epoch": 0.2524539821229459, "grad_norm": 6.521224886232347, "learning_rate": 8.757649937553149e-06, "loss": 17.608, "step": 13811 }, { "epoch": 0.2524722613193924, "grad_norm": 7.291541060939861, "learning_rate": 8.757454652473345e-06, "loss": 17.6405, "step": 13812 }, { "epoch": 0.2524905405158389, "grad_norm": 10.104211927869024, "learning_rate": 8.75725935422399e-06, "loss": 17.9236, "step": 13813 }, { "epoch": 0.25250881971228545, "grad_norm": 5.76059508199635, "learning_rate": 8.757064042805767e-06, "loss": 17.3052, "step": 13814 }, { "epoch": 0.252527098908732, "grad_norm": 6.423065067097856, "learning_rate": 8.75686871821936e-06, "loss": 17.5747, "step": 13815 }, { "epoch": 0.2525453781051785, "grad_norm": 6.507071909941204, "learning_rate": 8.756673380465453e-06, "loss": 17.6979, "step": 13816 }, { "epoch": 0.252563657301625, "grad_norm": 5.826902914321585, "learning_rate": 8.756478029544733e-06, "loss": 17.3318, "step": 13817 }, { "epoch": 0.25258193649807154, "grad_norm": 5.9334082046521965, "learning_rate": 8.756282665457884e-06, "loss": 17.2388, "step": 13818 }, { "epoch": 0.25260021569451807, "grad_norm": 6.744000989321716, "learning_rate": 8.756087288205588e-06, "loss": 17.7638, "step": 13819 }, { "epoch": 0.2526184948909646, "grad_norm": 5.564590959550204, "learning_rate": 8.755891897788534e-06, "loss": 17.0945, "step": 13820 }, { "epoch": 0.25263677408741114, "grad_norm": 5.291913492772538, "learning_rate": 8.755696494207405e-06, "loss": 17.153, "step": 13821 }, { "epoch": 0.2526550532838576, "grad_norm": 6.384061545444824, "learning_rate": 8.755501077462885e-06, "loss": 17.267, "step": 13822 }, { "epoch": 0.25267333248030416, "grad_norm": 6.841622797234484, "learning_rate": 8.75530564755566e-06, "loss": 17.4829, "step": 13823 }, { "epoch": 0.2526916116767507, "grad_norm": 6.412165616148324, "learning_rate": 8.755110204486414e-06, "loss": 17.7288, "step": 13824 }, { "epoch": 0.25270989087319723, "grad_norm": 8.189640179660579, "learning_rate": 8.754914748255832e-06, "loss": 17.7499, "step": 13825 }, { "epoch": 0.25272817006964376, "grad_norm": 6.376840522198051, "learning_rate": 8.754719278864601e-06, "loss": 17.3897, "step": 13826 }, { "epoch": 0.25274644926609025, "grad_norm": 5.0816212245536345, "learning_rate": 8.754523796313404e-06, "loss": 16.8883, "step": 13827 }, { "epoch": 0.2527647284625368, "grad_norm": 10.482001907576482, "learning_rate": 8.754328300602928e-06, "loss": 18.3032, "step": 13828 }, { "epoch": 0.2527830076589833, "grad_norm": 6.792427911599967, "learning_rate": 8.754132791733856e-06, "loss": 17.7622, "step": 13829 }, { "epoch": 0.25280128685542985, "grad_norm": 6.72865970828854, "learning_rate": 8.753937269706873e-06, "loss": 17.924, "step": 13830 }, { "epoch": 0.25281956605187633, "grad_norm": 6.264580177397738, "learning_rate": 8.753741734522668e-06, "loss": 17.551, "step": 13831 }, { "epoch": 0.25283784524832287, "grad_norm": 7.223483013362514, "learning_rate": 8.753546186181924e-06, "loss": 17.7338, "step": 13832 }, { "epoch": 0.2528561244447694, "grad_norm": 5.476514762019108, "learning_rate": 8.753350624685325e-06, "loss": 17.0362, "step": 13833 }, { "epoch": 0.25287440364121594, "grad_norm": 6.736717364355989, "learning_rate": 8.753155050033558e-06, "loss": 17.6898, "step": 13834 }, { "epoch": 0.2528926828376625, "grad_norm": 6.826553700256823, "learning_rate": 8.752959462227308e-06, "loss": 17.6636, "step": 13835 }, { "epoch": 0.25291096203410895, "grad_norm": 6.110653589759968, "learning_rate": 8.752763861267262e-06, "loss": 17.4454, "step": 13836 }, { "epoch": 0.2529292412305555, "grad_norm": 8.146771509318832, "learning_rate": 8.752568247154103e-06, "loss": 18.3319, "step": 13837 }, { "epoch": 0.252947520427002, "grad_norm": 6.042919896918304, "learning_rate": 8.752372619888519e-06, "loss": 17.3727, "step": 13838 }, { "epoch": 0.25296579962344856, "grad_norm": 5.608386316499777, "learning_rate": 8.752176979471194e-06, "loss": 17.0876, "step": 13839 }, { "epoch": 0.2529840788198951, "grad_norm": 6.371974087607878, "learning_rate": 8.751981325902814e-06, "loss": 17.6724, "step": 13840 }, { "epoch": 0.2530023580163416, "grad_norm": 7.217163091063996, "learning_rate": 8.751785659184066e-06, "loss": 17.7458, "step": 13841 }, { "epoch": 0.2530206372127881, "grad_norm": 8.851814579077875, "learning_rate": 8.751589979315634e-06, "loss": 18.2688, "step": 13842 }, { "epoch": 0.25303891640923465, "grad_norm": 7.482599558175977, "learning_rate": 8.751394286298204e-06, "loss": 17.707, "step": 13843 }, { "epoch": 0.2530571956056812, "grad_norm": 6.040627743486871, "learning_rate": 8.751198580132464e-06, "loss": 17.5163, "step": 13844 }, { "epoch": 0.2530754748021277, "grad_norm": 8.717437003205008, "learning_rate": 8.751002860819098e-06, "loss": 18.3448, "step": 13845 }, { "epoch": 0.2530937539985742, "grad_norm": 5.543502707869062, "learning_rate": 8.750807128358792e-06, "loss": 17.028, "step": 13846 }, { "epoch": 0.25311203319502074, "grad_norm": 5.832826377516405, "learning_rate": 8.750611382752233e-06, "loss": 17.4799, "step": 13847 }, { "epoch": 0.25313031239146727, "grad_norm": 6.421644335761602, "learning_rate": 8.750415624000105e-06, "loss": 17.5648, "step": 13848 }, { "epoch": 0.2531485915879138, "grad_norm": 7.0638634413962205, "learning_rate": 8.750219852103098e-06, "loss": 17.6503, "step": 13849 }, { "epoch": 0.25316687078436034, "grad_norm": 6.496441590330903, "learning_rate": 8.750024067061895e-06, "loss": 17.3566, "step": 13850 }, { "epoch": 0.2531851499808068, "grad_norm": 7.657369861136381, "learning_rate": 8.749828268877182e-06, "loss": 18.2053, "step": 13851 }, { "epoch": 0.25320342917725336, "grad_norm": 5.9427225811770645, "learning_rate": 8.74963245754965e-06, "loss": 17.3519, "step": 13852 }, { "epoch": 0.2532217083736999, "grad_norm": 8.107318124705973, "learning_rate": 8.749436633079977e-06, "loss": 17.6699, "step": 13853 }, { "epoch": 0.25323998757014643, "grad_norm": 6.6438036051962825, "learning_rate": 8.749240795468856e-06, "loss": 17.5672, "step": 13854 }, { "epoch": 0.25325826676659297, "grad_norm": 6.85004338489899, "learning_rate": 8.749044944716972e-06, "loss": 17.7759, "step": 13855 }, { "epoch": 0.25327654596303945, "grad_norm": 5.702389014906411, "learning_rate": 8.748849080825011e-06, "loss": 17.2129, "step": 13856 }, { "epoch": 0.253294825159486, "grad_norm": 5.6610331545114, "learning_rate": 8.748653203793658e-06, "loss": 17.0836, "step": 13857 }, { "epoch": 0.2533131043559325, "grad_norm": 7.884955625477877, "learning_rate": 8.7484573136236e-06, "loss": 18.0809, "step": 13858 }, { "epoch": 0.25333138355237905, "grad_norm": 6.298036069861454, "learning_rate": 8.748261410315527e-06, "loss": 17.5352, "step": 13859 }, { "epoch": 0.2533496627488256, "grad_norm": 5.899677885519186, "learning_rate": 8.748065493870122e-06, "loss": 17.3049, "step": 13860 }, { "epoch": 0.25336794194527207, "grad_norm": 7.452406224920589, "learning_rate": 8.747869564288072e-06, "loss": 18.1885, "step": 13861 }, { "epoch": 0.2533862211417186, "grad_norm": 6.616031007667161, "learning_rate": 8.747673621570063e-06, "loss": 17.498, "step": 13862 }, { "epoch": 0.25340450033816514, "grad_norm": 6.190275743116507, "learning_rate": 8.747477665716786e-06, "loss": 17.239, "step": 13863 }, { "epoch": 0.2534227795346117, "grad_norm": 6.69834684601848, "learning_rate": 8.747281696728922e-06, "loss": 17.6468, "step": 13864 }, { "epoch": 0.25344105873105816, "grad_norm": 6.465176767968881, "learning_rate": 8.747085714607164e-06, "loss": 17.3516, "step": 13865 }, { "epoch": 0.2534593379275047, "grad_norm": 7.138906865721986, "learning_rate": 8.746889719352194e-06, "loss": 18.082, "step": 13866 }, { "epoch": 0.2534776171239512, "grad_norm": 7.035732221360487, "learning_rate": 8.746693710964702e-06, "loss": 17.628, "step": 13867 }, { "epoch": 0.25349589632039776, "grad_norm": 6.487640928438261, "learning_rate": 8.746497689445373e-06, "loss": 17.6296, "step": 13868 }, { "epoch": 0.2535141755168443, "grad_norm": 6.642219085040461, "learning_rate": 8.746301654794894e-06, "loss": 17.533, "step": 13869 }, { "epoch": 0.2535324547132908, "grad_norm": 6.657256827746655, "learning_rate": 8.746105607013952e-06, "loss": 17.4731, "step": 13870 }, { "epoch": 0.2535507339097373, "grad_norm": 8.098118528783477, "learning_rate": 8.745909546103237e-06, "loss": 17.325, "step": 13871 }, { "epoch": 0.25356901310618385, "grad_norm": 6.5370206159418425, "learning_rate": 8.745713472063432e-06, "loss": 17.4677, "step": 13872 }, { "epoch": 0.2535872923026304, "grad_norm": 5.555840875869279, "learning_rate": 8.745517384895228e-06, "loss": 17.1296, "step": 13873 }, { "epoch": 0.2536055714990769, "grad_norm": 5.61644510958449, "learning_rate": 8.745321284599311e-06, "loss": 17.238, "step": 13874 }, { "epoch": 0.2536238506955234, "grad_norm": 7.451073328635847, "learning_rate": 8.745125171176367e-06, "loss": 17.946, "step": 13875 }, { "epoch": 0.25364212989196994, "grad_norm": 6.068830688793433, "learning_rate": 8.744929044627084e-06, "loss": 17.3497, "step": 13876 }, { "epoch": 0.25366040908841647, "grad_norm": 6.33350173519903, "learning_rate": 8.74473290495215e-06, "loss": 17.3324, "step": 13877 }, { "epoch": 0.253678688284863, "grad_norm": 6.278681972251112, "learning_rate": 8.744536752152251e-06, "loss": 17.4805, "step": 13878 }, { "epoch": 0.25369696748130954, "grad_norm": 6.739404763073544, "learning_rate": 8.744340586228077e-06, "loss": 17.6275, "step": 13879 }, { "epoch": 0.253715246677756, "grad_norm": 5.301174165885163, "learning_rate": 8.744144407180315e-06, "loss": 16.9004, "step": 13880 }, { "epoch": 0.25373352587420256, "grad_norm": 5.948201083202224, "learning_rate": 8.74394821500965e-06, "loss": 17.1906, "step": 13881 }, { "epoch": 0.2537518050706491, "grad_norm": 6.435881926797897, "learning_rate": 8.743752009716772e-06, "loss": 17.597, "step": 13882 }, { "epoch": 0.25377008426709563, "grad_norm": 5.458443195542765, "learning_rate": 8.743555791302368e-06, "loss": 17.2105, "step": 13883 }, { "epoch": 0.25378836346354217, "grad_norm": 6.027649550947092, "learning_rate": 8.743359559767127e-06, "loss": 17.2979, "step": 13884 }, { "epoch": 0.25380664265998865, "grad_norm": 6.813964020203235, "learning_rate": 8.743163315111733e-06, "loss": 17.7843, "step": 13885 }, { "epoch": 0.2538249218564352, "grad_norm": 7.348223519766389, "learning_rate": 8.742967057336877e-06, "loss": 17.8382, "step": 13886 }, { "epoch": 0.2538432010528817, "grad_norm": 5.694232252564015, "learning_rate": 8.742770786443249e-06, "loss": 17.0623, "step": 13887 }, { "epoch": 0.25386148024932825, "grad_norm": 6.130295450701716, "learning_rate": 8.742574502431532e-06, "loss": 17.6467, "step": 13888 }, { "epoch": 0.2538797594457748, "grad_norm": 7.682631583749733, "learning_rate": 8.742378205302415e-06, "loss": 17.8717, "step": 13889 }, { "epoch": 0.25389803864222127, "grad_norm": 9.483293760520688, "learning_rate": 8.74218189505659e-06, "loss": 18.3314, "step": 13890 }, { "epoch": 0.2539163178386678, "grad_norm": 6.698039296153758, "learning_rate": 8.74198557169474e-06, "loss": 17.7914, "step": 13891 }, { "epoch": 0.25393459703511434, "grad_norm": 6.219013252527224, "learning_rate": 8.741789235217558e-06, "loss": 17.3265, "step": 13892 }, { "epoch": 0.2539528762315609, "grad_norm": 7.93899289527623, "learning_rate": 8.741592885625724e-06, "loss": 18.2619, "step": 13893 }, { "epoch": 0.2539711554280074, "grad_norm": 7.431414942399366, "learning_rate": 8.741396522919937e-06, "loss": 17.8052, "step": 13894 }, { "epoch": 0.2539894346244539, "grad_norm": 6.055708376786302, "learning_rate": 8.741200147100877e-06, "loss": 17.224, "step": 13895 }, { "epoch": 0.2540077138209004, "grad_norm": 7.543950459697201, "learning_rate": 8.741003758169236e-06, "loss": 17.7052, "step": 13896 }, { "epoch": 0.25402599301734696, "grad_norm": 6.290080852767708, "learning_rate": 8.740807356125702e-06, "loss": 17.3903, "step": 13897 }, { "epoch": 0.2540442722137935, "grad_norm": 6.140791835594011, "learning_rate": 8.740610940970962e-06, "loss": 17.4723, "step": 13898 }, { "epoch": 0.25406255141024, "grad_norm": 5.786914095509982, "learning_rate": 8.740414512705706e-06, "loss": 17.2791, "step": 13899 }, { "epoch": 0.2540808306066865, "grad_norm": 6.452620799877661, "learning_rate": 8.740218071330622e-06, "loss": 17.7083, "step": 13900 }, { "epoch": 0.25409910980313305, "grad_norm": 7.575533529925206, "learning_rate": 8.740021616846397e-06, "loss": 18.1212, "step": 13901 }, { "epoch": 0.2541173889995796, "grad_norm": 7.746599654235914, "learning_rate": 8.739825149253721e-06, "loss": 18.0708, "step": 13902 }, { "epoch": 0.2541356681960261, "grad_norm": 6.4143169159359, "learning_rate": 8.739628668553283e-06, "loss": 17.7577, "step": 13903 }, { "epoch": 0.2541539473924726, "grad_norm": 7.181830896596694, "learning_rate": 8.73943217474577e-06, "loss": 18.2878, "step": 13904 }, { "epoch": 0.25417222658891914, "grad_norm": 6.212225736628983, "learning_rate": 8.739235667831874e-06, "loss": 17.5087, "step": 13905 }, { "epoch": 0.2541905057853657, "grad_norm": 7.833271790484041, "learning_rate": 8.739039147812278e-06, "loss": 18.1298, "step": 13906 }, { "epoch": 0.2542087849818122, "grad_norm": 7.227486585626569, "learning_rate": 8.738842614687676e-06, "loss": 17.6637, "step": 13907 }, { "epoch": 0.25422706417825874, "grad_norm": 7.374676572810546, "learning_rate": 8.738646068458757e-06, "loss": 17.8617, "step": 13908 }, { "epoch": 0.2542453433747052, "grad_norm": 6.5000955280762245, "learning_rate": 8.738449509126205e-06, "loss": 17.7477, "step": 13909 }, { "epoch": 0.25426362257115176, "grad_norm": 6.593971964695243, "learning_rate": 8.738252936690713e-06, "loss": 17.4493, "step": 13910 }, { "epoch": 0.2542819017675983, "grad_norm": 7.818910279079011, "learning_rate": 8.73805635115297e-06, "loss": 18.1238, "step": 13911 }, { "epoch": 0.25430018096404483, "grad_norm": 7.10818976180064, "learning_rate": 8.737859752513661e-06, "loss": 17.6805, "step": 13912 }, { "epoch": 0.25431846016049137, "grad_norm": 6.605018965616754, "learning_rate": 8.73766314077348e-06, "loss": 17.3774, "step": 13913 }, { "epoch": 0.25433673935693785, "grad_norm": 6.737453418711159, "learning_rate": 8.737466515933116e-06, "loss": 17.7287, "step": 13914 }, { "epoch": 0.2543550185533844, "grad_norm": 6.726761825052466, "learning_rate": 8.737269877993254e-06, "loss": 17.3611, "step": 13915 }, { "epoch": 0.2543732977498309, "grad_norm": 6.686468349938027, "learning_rate": 8.737073226954585e-06, "loss": 17.5626, "step": 13916 }, { "epoch": 0.25439157694627745, "grad_norm": 9.721580061837525, "learning_rate": 8.736876562817798e-06, "loss": 18.8001, "step": 13917 }, { "epoch": 0.254409856142724, "grad_norm": 7.320850100564549, "learning_rate": 8.736679885583583e-06, "loss": 18.041, "step": 13918 }, { "epoch": 0.25442813533917047, "grad_norm": 7.4543355840587475, "learning_rate": 8.73648319525263e-06, "loss": 17.9807, "step": 13919 }, { "epoch": 0.254446414535617, "grad_norm": 7.569963979882603, "learning_rate": 8.736286491825627e-06, "loss": 17.8133, "step": 13920 }, { "epoch": 0.25446469373206354, "grad_norm": 6.071590080065675, "learning_rate": 8.736089775303266e-06, "loss": 17.4813, "step": 13921 }, { "epoch": 0.2544829729285101, "grad_norm": 6.161994516871227, "learning_rate": 8.735893045686233e-06, "loss": 17.3722, "step": 13922 }, { "epoch": 0.2545012521249566, "grad_norm": 7.862371069244069, "learning_rate": 8.735696302975219e-06, "loss": 18.0639, "step": 13923 }, { "epoch": 0.2545195313214031, "grad_norm": 6.276884236657523, "learning_rate": 8.735499547170914e-06, "loss": 17.3353, "step": 13924 }, { "epoch": 0.2545378105178496, "grad_norm": 5.967898857485332, "learning_rate": 8.735302778274009e-06, "loss": 17.3818, "step": 13925 }, { "epoch": 0.25455608971429616, "grad_norm": 7.193800609691263, "learning_rate": 8.73510599628519e-06, "loss": 18.0089, "step": 13926 }, { "epoch": 0.2545743689107427, "grad_norm": 6.54437382860853, "learning_rate": 8.734909201205148e-06, "loss": 17.6759, "step": 13927 }, { "epoch": 0.25459264810718923, "grad_norm": 5.981309927884284, "learning_rate": 8.734712393034574e-06, "loss": 17.073, "step": 13928 }, { "epoch": 0.2546109273036357, "grad_norm": 6.252306285214709, "learning_rate": 8.734515571774157e-06, "loss": 17.2187, "step": 13929 }, { "epoch": 0.25462920650008225, "grad_norm": 6.1530465363391915, "learning_rate": 8.734318737424588e-06, "loss": 17.4144, "step": 13930 }, { "epoch": 0.2546474856965288, "grad_norm": 7.53964874121549, "learning_rate": 8.734121889986555e-06, "loss": 17.6794, "step": 13931 }, { "epoch": 0.2546657648929753, "grad_norm": 6.32611595168549, "learning_rate": 8.733925029460747e-06, "loss": 17.4249, "step": 13932 }, { "epoch": 0.2546840440894218, "grad_norm": 6.687145282129531, "learning_rate": 8.733728155847858e-06, "loss": 17.9875, "step": 13933 }, { "epoch": 0.25470232328586834, "grad_norm": 6.882934764697469, "learning_rate": 8.733531269148576e-06, "loss": 17.7335, "step": 13934 }, { "epoch": 0.2547206024823149, "grad_norm": 7.678694872833678, "learning_rate": 8.73333436936359e-06, "loss": 18.0639, "step": 13935 }, { "epoch": 0.2547388816787614, "grad_norm": 6.626212475299726, "learning_rate": 8.733137456493593e-06, "loss": 17.6482, "step": 13936 }, { "epoch": 0.25475716087520794, "grad_norm": 7.1339521180408445, "learning_rate": 8.732940530539271e-06, "loss": 17.8985, "step": 13937 }, { "epoch": 0.2547754400716544, "grad_norm": 6.607697252050391, "learning_rate": 8.732743591501316e-06, "loss": 17.5549, "step": 13938 }, { "epoch": 0.25479371926810096, "grad_norm": 6.155478463975609, "learning_rate": 8.732546639380419e-06, "loss": 17.4897, "step": 13939 }, { "epoch": 0.2548119984645475, "grad_norm": 7.410949350228957, "learning_rate": 8.732349674177272e-06, "loss": 17.678, "step": 13940 }, { "epoch": 0.25483027766099403, "grad_norm": 5.955900673297592, "learning_rate": 8.732152695892562e-06, "loss": 17.3842, "step": 13941 }, { "epoch": 0.25484855685744057, "grad_norm": 7.429077063390365, "learning_rate": 8.73195570452698e-06, "loss": 17.7786, "step": 13942 }, { "epoch": 0.25486683605388705, "grad_norm": 6.964177608064067, "learning_rate": 8.731758700081217e-06, "loss": 17.8819, "step": 13943 }, { "epoch": 0.2548851152503336, "grad_norm": 7.494081046872606, "learning_rate": 8.731561682555965e-06, "loss": 18.024, "step": 13944 }, { "epoch": 0.2549033944467801, "grad_norm": 7.457489692667915, "learning_rate": 8.73136465195191e-06, "loss": 17.6645, "step": 13945 }, { "epoch": 0.25492167364322665, "grad_norm": 7.575199443544168, "learning_rate": 8.73116760826975e-06, "loss": 17.6916, "step": 13946 }, { "epoch": 0.2549399528396732, "grad_norm": 6.211321489064392, "learning_rate": 8.73097055151017e-06, "loss": 17.3002, "step": 13947 }, { "epoch": 0.25495823203611967, "grad_norm": 6.231393405979343, "learning_rate": 8.73077348167386e-06, "loss": 17.3247, "step": 13948 }, { "epoch": 0.2549765112325662, "grad_norm": 7.8589940161868475, "learning_rate": 8.730576398761514e-06, "loss": 17.8705, "step": 13949 }, { "epoch": 0.25499479042901274, "grad_norm": 6.092213046044782, "learning_rate": 8.730379302773822e-06, "loss": 17.2994, "step": 13950 }, { "epoch": 0.2550130696254593, "grad_norm": 6.144376781693334, "learning_rate": 8.730182193711472e-06, "loss": 17.2841, "step": 13951 }, { "epoch": 0.2550313488219058, "grad_norm": 6.309420077332737, "learning_rate": 8.729985071575158e-06, "loss": 17.5078, "step": 13952 }, { "epoch": 0.2550496280183523, "grad_norm": 6.982088765553154, "learning_rate": 8.729787936365572e-06, "loss": 17.7818, "step": 13953 }, { "epoch": 0.25506790721479883, "grad_norm": 7.710406989976027, "learning_rate": 8.729590788083403e-06, "loss": 17.8721, "step": 13954 }, { "epoch": 0.25508618641124536, "grad_norm": 6.559110564146433, "learning_rate": 8.72939362672934e-06, "loss": 17.9672, "step": 13955 }, { "epoch": 0.2551044656076919, "grad_norm": 6.982277674262234, "learning_rate": 8.729196452304076e-06, "loss": 17.608, "step": 13956 }, { "epoch": 0.25512274480413843, "grad_norm": 7.703209985365643, "learning_rate": 8.728999264808303e-06, "loss": 18.0226, "step": 13957 }, { "epoch": 0.2551410240005849, "grad_norm": 7.204138122688372, "learning_rate": 8.72880206424271e-06, "loss": 17.6252, "step": 13958 }, { "epoch": 0.25515930319703145, "grad_norm": 7.232827552461167, "learning_rate": 8.72860485060799e-06, "loss": 17.8974, "step": 13959 }, { "epoch": 0.255177582393478, "grad_norm": 6.758013803673432, "learning_rate": 8.728407623904833e-06, "loss": 17.7697, "step": 13960 }, { "epoch": 0.2551958615899245, "grad_norm": 7.997805217356011, "learning_rate": 8.728210384133932e-06, "loss": 18.4118, "step": 13961 }, { "epoch": 0.25521414078637106, "grad_norm": 6.200605591400633, "learning_rate": 8.728013131295976e-06, "loss": 17.4238, "step": 13962 }, { "epoch": 0.25523241998281754, "grad_norm": 6.175082033002962, "learning_rate": 8.727815865391657e-06, "loss": 17.2796, "step": 13963 }, { "epoch": 0.2552506991792641, "grad_norm": 6.141657054367813, "learning_rate": 8.727618586421669e-06, "loss": 17.1876, "step": 13964 }, { "epoch": 0.2552689783757106, "grad_norm": 6.099863422243416, "learning_rate": 8.7274212943867e-06, "loss": 17.4914, "step": 13965 }, { "epoch": 0.25528725757215714, "grad_norm": 5.8144270085021486, "learning_rate": 8.727223989287443e-06, "loss": 17.0464, "step": 13966 }, { "epoch": 0.2553055367686036, "grad_norm": 8.306539054197764, "learning_rate": 8.72702667112459e-06, "loss": 18.2899, "step": 13967 }, { "epoch": 0.25532381596505016, "grad_norm": 5.482667624083453, "learning_rate": 8.72682933989883e-06, "loss": 17.1317, "step": 13968 }, { "epoch": 0.2553420951614967, "grad_norm": 5.693117008850541, "learning_rate": 8.72663199561086e-06, "loss": 17.2629, "step": 13969 }, { "epoch": 0.25536037435794323, "grad_norm": 6.306026003840627, "learning_rate": 8.726434638261365e-06, "loss": 17.4804, "step": 13970 }, { "epoch": 0.25537865355438977, "grad_norm": 6.4212860185533716, "learning_rate": 8.726237267851041e-06, "loss": 17.4157, "step": 13971 }, { "epoch": 0.25539693275083625, "grad_norm": 6.958322437037328, "learning_rate": 8.726039884380579e-06, "loss": 17.7611, "step": 13972 }, { "epoch": 0.2554152119472828, "grad_norm": 5.555300668995391, "learning_rate": 8.72584248785067e-06, "loss": 17.1776, "step": 13973 }, { "epoch": 0.2554334911437293, "grad_norm": 6.687111880084589, "learning_rate": 8.725645078262007e-06, "loss": 17.6282, "step": 13974 }, { "epoch": 0.25545177034017585, "grad_norm": 6.638536443221294, "learning_rate": 8.72544765561528e-06, "loss": 17.5349, "step": 13975 }, { "epoch": 0.2554700495366224, "grad_norm": 5.914273252875999, "learning_rate": 8.725250219911184e-06, "loss": 17.1228, "step": 13976 }, { "epoch": 0.25548832873306887, "grad_norm": 6.425812346335787, "learning_rate": 8.725052771150409e-06, "loss": 17.4639, "step": 13977 }, { "epoch": 0.2555066079295154, "grad_norm": 5.8223807022443985, "learning_rate": 8.724855309333646e-06, "loss": 16.9239, "step": 13978 }, { "epoch": 0.25552488712596194, "grad_norm": 6.484578700575855, "learning_rate": 8.72465783446159e-06, "loss": 17.4878, "step": 13979 }, { "epoch": 0.2555431663224085, "grad_norm": 6.083479783688803, "learning_rate": 8.72446034653493e-06, "loss": 17.4781, "step": 13980 }, { "epoch": 0.255561445518855, "grad_norm": 8.393779815892302, "learning_rate": 8.72426284555436e-06, "loss": 18.4201, "step": 13981 }, { "epoch": 0.2555797247153015, "grad_norm": 7.917411502202908, "learning_rate": 8.724065331520572e-06, "loss": 18.1957, "step": 13982 }, { "epoch": 0.25559800391174803, "grad_norm": 6.664139696039288, "learning_rate": 8.723867804434259e-06, "loss": 17.3455, "step": 13983 }, { "epoch": 0.25561628310819456, "grad_norm": 6.308596894214555, "learning_rate": 8.723670264296111e-06, "loss": 17.3711, "step": 13984 }, { "epoch": 0.2556345623046411, "grad_norm": 8.337303492069122, "learning_rate": 8.723472711106825e-06, "loss": 18.7995, "step": 13985 }, { "epoch": 0.25565284150108764, "grad_norm": 7.278473652304813, "learning_rate": 8.723275144867086e-06, "loss": 18.0212, "step": 13986 }, { "epoch": 0.2556711206975341, "grad_norm": 5.6860856800645445, "learning_rate": 8.723077565577594e-06, "loss": 17.3226, "step": 13987 }, { "epoch": 0.25568939989398065, "grad_norm": 6.1040925281351885, "learning_rate": 8.722879973239035e-06, "loss": 17.3142, "step": 13988 }, { "epoch": 0.2557076790904272, "grad_norm": 7.849493113221841, "learning_rate": 8.722682367852107e-06, "loss": 17.8536, "step": 13989 }, { "epoch": 0.2557259582868737, "grad_norm": 7.419112982774977, "learning_rate": 8.722484749417502e-06, "loss": 18.091, "step": 13990 }, { "epoch": 0.25574423748332026, "grad_norm": 6.844159293020602, "learning_rate": 8.722287117935908e-06, "loss": 17.6159, "step": 13991 }, { "epoch": 0.25576251667976674, "grad_norm": 11.027302576659178, "learning_rate": 8.722089473408023e-06, "loss": 18.0748, "step": 13992 }, { "epoch": 0.2557807958762133, "grad_norm": 6.231287286717342, "learning_rate": 8.721891815834534e-06, "loss": 17.5187, "step": 13993 }, { "epoch": 0.2557990750726598, "grad_norm": 5.926319474170429, "learning_rate": 8.72169414521614e-06, "loss": 17.2324, "step": 13994 }, { "epoch": 0.25581735426910635, "grad_norm": 9.830976195430448, "learning_rate": 8.721496461553528e-06, "loss": 18.9214, "step": 13995 }, { "epoch": 0.2558356334655529, "grad_norm": 5.332807377255744, "learning_rate": 8.721298764847397e-06, "loss": 16.7563, "step": 13996 }, { "epoch": 0.25585391266199936, "grad_norm": 5.556155595301225, "learning_rate": 8.721101055098436e-06, "loss": 17.1854, "step": 13997 }, { "epoch": 0.2558721918584459, "grad_norm": 6.470278581951466, "learning_rate": 8.720903332307339e-06, "loss": 17.4058, "step": 13998 }, { "epoch": 0.25589047105489243, "grad_norm": 5.6971359483934965, "learning_rate": 8.720705596474797e-06, "loss": 17.1146, "step": 13999 }, { "epoch": 0.25590875025133897, "grad_norm": 7.508495639754144, "learning_rate": 8.720507847601508e-06, "loss": 17.8727, "step": 14000 }, { "epoch": 0.25592702944778545, "grad_norm": 6.073201872739274, "learning_rate": 8.720310085688158e-06, "loss": 17.1639, "step": 14001 }, { "epoch": 0.255945308644232, "grad_norm": 6.219751981885219, "learning_rate": 8.720112310735445e-06, "loss": 17.5806, "step": 14002 }, { "epoch": 0.2559635878406785, "grad_norm": 6.3722758522487135, "learning_rate": 8.719914522744063e-06, "loss": 17.4845, "step": 14003 }, { "epoch": 0.25598186703712505, "grad_norm": 6.430087771797434, "learning_rate": 8.719716721714702e-06, "loss": 17.2951, "step": 14004 }, { "epoch": 0.2560001462335716, "grad_norm": 5.954364206203981, "learning_rate": 8.719518907648057e-06, "loss": 17.0703, "step": 14005 }, { "epoch": 0.25601842543001807, "grad_norm": 7.3816256220639405, "learning_rate": 8.71932108054482e-06, "loss": 17.8422, "step": 14006 }, { "epoch": 0.2560367046264646, "grad_norm": 5.585556965955438, "learning_rate": 8.719123240405686e-06, "loss": 17.3696, "step": 14007 }, { "epoch": 0.25605498382291114, "grad_norm": 6.562984981021811, "learning_rate": 8.71892538723135e-06, "loss": 17.5934, "step": 14008 }, { "epoch": 0.2560732630193577, "grad_norm": 5.994157750088668, "learning_rate": 8.7187275210225e-06, "loss": 17.5556, "step": 14009 }, { "epoch": 0.2560915422158042, "grad_norm": 7.149441666470342, "learning_rate": 8.718529641779834e-06, "loss": 17.8358, "step": 14010 }, { "epoch": 0.2561098214122507, "grad_norm": 10.585428630174878, "learning_rate": 8.718331749504045e-06, "loss": 18.3699, "step": 14011 }, { "epoch": 0.25612810060869723, "grad_norm": 5.729931093024647, "learning_rate": 8.718133844195825e-06, "loss": 17.2859, "step": 14012 }, { "epoch": 0.25614637980514376, "grad_norm": 8.041743853428542, "learning_rate": 8.717935925855869e-06, "loss": 17.9278, "step": 14013 }, { "epoch": 0.2561646590015903, "grad_norm": 6.644423763100476, "learning_rate": 8.717737994484869e-06, "loss": 17.8461, "step": 14014 }, { "epoch": 0.25618293819803684, "grad_norm": 7.099340966536888, "learning_rate": 8.717540050083522e-06, "loss": 18.0045, "step": 14015 }, { "epoch": 0.2562012173944833, "grad_norm": 6.345347347165285, "learning_rate": 8.717342092652518e-06, "loss": 17.5001, "step": 14016 }, { "epoch": 0.25621949659092985, "grad_norm": 6.670132674655566, "learning_rate": 8.717144122192553e-06, "loss": 17.6985, "step": 14017 }, { "epoch": 0.2562377757873764, "grad_norm": 7.582148281771751, "learning_rate": 8.71694613870432e-06, "loss": 18.2426, "step": 14018 }, { "epoch": 0.2562560549838229, "grad_norm": 7.761620215830936, "learning_rate": 8.716748142188514e-06, "loss": 17.9051, "step": 14019 }, { "epoch": 0.25627433418026946, "grad_norm": 6.94165788412036, "learning_rate": 8.71655013264583e-06, "loss": 17.7852, "step": 14020 }, { "epoch": 0.25629261337671594, "grad_norm": 6.112493460876916, "learning_rate": 8.716352110076958e-06, "loss": 17.0819, "step": 14021 }, { "epoch": 0.2563108925731625, "grad_norm": 6.820209225329127, "learning_rate": 8.716154074482594e-06, "loss": 17.7083, "step": 14022 }, { "epoch": 0.256329171769609, "grad_norm": 6.890240706035527, "learning_rate": 8.715956025863433e-06, "loss": 17.4598, "step": 14023 }, { "epoch": 0.25634745096605555, "grad_norm": 5.865000848072777, "learning_rate": 8.71575796422017e-06, "loss": 17.1963, "step": 14024 }, { "epoch": 0.2563657301625021, "grad_norm": 5.9749062755893485, "learning_rate": 8.715559889553496e-06, "loss": 17.4812, "step": 14025 }, { "epoch": 0.25638400935894856, "grad_norm": 6.635796194940061, "learning_rate": 8.715361801864107e-06, "loss": 17.6757, "step": 14026 }, { "epoch": 0.2564022885553951, "grad_norm": 6.332782098815111, "learning_rate": 8.715163701152698e-06, "loss": 17.6062, "step": 14027 }, { "epoch": 0.25642056775184163, "grad_norm": 6.569707572976014, "learning_rate": 8.714965587419964e-06, "loss": 17.7143, "step": 14028 }, { "epoch": 0.25643884694828817, "grad_norm": 7.926658291308322, "learning_rate": 8.714767460666595e-06, "loss": 18.3433, "step": 14029 }, { "epoch": 0.2564571261447347, "grad_norm": 6.703570270573447, "learning_rate": 8.71456932089329e-06, "loss": 17.3925, "step": 14030 }, { "epoch": 0.2564754053411812, "grad_norm": 8.7198142927402, "learning_rate": 8.714371168100742e-06, "loss": 18.5173, "step": 14031 }, { "epoch": 0.2564936845376277, "grad_norm": 6.5711447764108, "learning_rate": 8.714173002289645e-06, "loss": 17.5438, "step": 14032 }, { "epoch": 0.25651196373407426, "grad_norm": 6.498513708490086, "learning_rate": 8.713974823460693e-06, "loss": 17.5866, "step": 14033 }, { "epoch": 0.2565302429305208, "grad_norm": 6.707410223345111, "learning_rate": 8.713776631614583e-06, "loss": 17.7918, "step": 14034 }, { "epoch": 0.25654852212696727, "grad_norm": 7.497156527708566, "learning_rate": 8.71357842675201e-06, "loss": 17.8623, "step": 14035 }, { "epoch": 0.2565668013234138, "grad_norm": 6.806956228926704, "learning_rate": 8.713380208873663e-06, "loss": 17.7035, "step": 14036 }, { "epoch": 0.25658508051986034, "grad_norm": 6.737806666072355, "learning_rate": 8.713181977980242e-06, "loss": 17.9184, "step": 14037 }, { "epoch": 0.2566033597163069, "grad_norm": 5.857737233930091, "learning_rate": 8.712983734072442e-06, "loss": 17.3217, "step": 14038 }, { "epoch": 0.2566216389127534, "grad_norm": 6.127108906197689, "learning_rate": 8.712785477150954e-06, "loss": 17.6265, "step": 14039 }, { "epoch": 0.2566399181091999, "grad_norm": 5.505189658724705, "learning_rate": 8.712587207216476e-06, "loss": 17.1689, "step": 14040 }, { "epoch": 0.25665819730564643, "grad_norm": 5.2633515517705955, "learning_rate": 8.712388924269701e-06, "loss": 17.2424, "step": 14041 }, { "epoch": 0.25667647650209297, "grad_norm": 7.385141693929254, "learning_rate": 8.712190628311327e-06, "loss": 17.9504, "step": 14042 }, { "epoch": 0.2566947556985395, "grad_norm": 7.28567741353326, "learning_rate": 8.711992319342047e-06, "loss": 17.8721, "step": 14043 }, { "epoch": 0.25671303489498604, "grad_norm": 7.0865725746620365, "learning_rate": 8.711793997362555e-06, "loss": 17.5467, "step": 14044 }, { "epoch": 0.2567313140914325, "grad_norm": 6.524143900895566, "learning_rate": 8.711595662373545e-06, "loss": 17.818, "step": 14045 }, { "epoch": 0.25674959328787905, "grad_norm": 7.689675182264577, "learning_rate": 8.711397314375717e-06, "loss": 17.8134, "step": 14046 }, { "epoch": 0.2567678724843256, "grad_norm": 7.314549210631258, "learning_rate": 8.711198953369763e-06, "loss": 17.7351, "step": 14047 }, { "epoch": 0.2567861516807721, "grad_norm": 7.349427702300283, "learning_rate": 8.711000579356379e-06, "loss": 17.9681, "step": 14048 }, { "epoch": 0.25680443087721866, "grad_norm": 5.522520021641442, "learning_rate": 8.710802192336258e-06, "loss": 17.2265, "step": 14049 }, { "epoch": 0.25682271007366514, "grad_norm": 6.809408078333556, "learning_rate": 8.7106037923101e-06, "loss": 17.8225, "step": 14050 }, { "epoch": 0.2568409892701117, "grad_norm": 5.658643531266042, "learning_rate": 8.710405379278597e-06, "loss": 17.1175, "step": 14051 }, { "epoch": 0.2568592684665582, "grad_norm": 7.664486081417579, "learning_rate": 8.710206953242444e-06, "loss": 18.2874, "step": 14052 }, { "epoch": 0.25687754766300475, "grad_norm": 6.86069447753021, "learning_rate": 8.710008514202336e-06, "loss": 17.63, "step": 14053 }, { "epoch": 0.2568958268594513, "grad_norm": 6.8516514476035715, "learning_rate": 8.709810062158974e-06, "loss": 17.5871, "step": 14054 }, { "epoch": 0.25691410605589776, "grad_norm": 6.834744715769432, "learning_rate": 8.709611597113048e-06, "loss": 17.7219, "step": 14055 }, { "epoch": 0.2569323852523443, "grad_norm": 6.19308693771849, "learning_rate": 8.709413119065255e-06, "loss": 17.4019, "step": 14056 }, { "epoch": 0.25695066444879083, "grad_norm": 5.851593282103616, "learning_rate": 8.709214628016292e-06, "loss": 17.3128, "step": 14057 }, { "epoch": 0.25696894364523737, "grad_norm": 6.868437862739134, "learning_rate": 8.709016123966851e-06, "loss": 17.3939, "step": 14058 }, { "epoch": 0.2569872228416839, "grad_norm": 5.822625513677289, "learning_rate": 8.708817606917633e-06, "loss": 17.2102, "step": 14059 }, { "epoch": 0.2570055020381304, "grad_norm": 7.895151638643454, "learning_rate": 8.70861907686933e-06, "loss": 18.2374, "step": 14060 }, { "epoch": 0.2570237812345769, "grad_norm": 6.178522074785444, "learning_rate": 8.70842053382264e-06, "loss": 17.3785, "step": 14061 }, { "epoch": 0.25704206043102346, "grad_norm": 6.946304226628333, "learning_rate": 8.708221977778256e-06, "loss": 17.5099, "step": 14062 }, { "epoch": 0.25706033962747, "grad_norm": 7.167277756268976, "learning_rate": 8.708023408736877e-06, "loss": 17.826, "step": 14063 }, { "epoch": 0.2570786188239165, "grad_norm": 6.693399938414784, "learning_rate": 8.707824826699199e-06, "loss": 17.3958, "step": 14064 }, { "epoch": 0.257096898020363, "grad_norm": 6.101790113077589, "learning_rate": 8.707626231665914e-06, "loss": 17.3234, "step": 14065 }, { "epoch": 0.25711517721680954, "grad_norm": 6.464939958515629, "learning_rate": 8.70742762363772e-06, "loss": 17.7037, "step": 14066 }, { "epoch": 0.2571334564132561, "grad_norm": 6.746677057749509, "learning_rate": 8.707229002615317e-06, "loss": 17.8575, "step": 14067 }, { "epoch": 0.2571517356097026, "grad_norm": 6.621445967947263, "learning_rate": 8.707030368599398e-06, "loss": 17.5681, "step": 14068 }, { "epoch": 0.2571700148061491, "grad_norm": 6.322926138021842, "learning_rate": 8.706831721590657e-06, "loss": 17.5165, "step": 14069 }, { "epoch": 0.25718829400259563, "grad_norm": 7.287228708380956, "learning_rate": 8.706633061589794e-06, "loss": 17.9319, "step": 14070 }, { "epoch": 0.25720657319904217, "grad_norm": 6.064790777506287, "learning_rate": 8.706434388597503e-06, "loss": 17.2462, "step": 14071 }, { "epoch": 0.2572248523954887, "grad_norm": 6.563810232322666, "learning_rate": 8.706235702614482e-06, "loss": 17.7225, "step": 14072 }, { "epoch": 0.25724313159193524, "grad_norm": 8.809152384498775, "learning_rate": 8.706037003641426e-06, "loss": 18.0658, "step": 14073 }, { "epoch": 0.2572614107883817, "grad_norm": 6.113233339539685, "learning_rate": 8.705838291679032e-06, "loss": 17.4984, "step": 14074 }, { "epoch": 0.25727968998482825, "grad_norm": 7.392843049875668, "learning_rate": 8.705639566727997e-06, "loss": 18.0276, "step": 14075 }, { "epoch": 0.2572979691812748, "grad_norm": 6.22602578544969, "learning_rate": 8.705440828789015e-06, "loss": 17.7045, "step": 14076 }, { "epoch": 0.2573162483777213, "grad_norm": 7.396244514051507, "learning_rate": 8.705242077862786e-06, "loss": 17.5306, "step": 14077 }, { "epoch": 0.25733452757416786, "grad_norm": 6.418514741742404, "learning_rate": 8.705043313950004e-06, "loss": 17.5961, "step": 14078 }, { "epoch": 0.25735280677061434, "grad_norm": 6.534272716960642, "learning_rate": 8.704844537051368e-06, "loss": 17.4244, "step": 14079 }, { "epoch": 0.2573710859670609, "grad_norm": 5.499315925609631, "learning_rate": 8.704645747167572e-06, "loss": 16.9318, "step": 14080 }, { "epoch": 0.2573893651635074, "grad_norm": 7.21959522451206, "learning_rate": 8.704446944299314e-06, "loss": 18.1147, "step": 14081 }, { "epoch": 0.25740764435995395, "grad_norm": 7.815593595945188, "learning_rate": 8.704248128447293e-06, "loss": 18.2904, "step": 14082 }, { "epoch": 0.2574259235564005, "grad_norm": 6.228024877521958, "learning_rate": 8.704049299612203e-06, "loss": 17.3302, "step": 14083 }, { "epoch": 0.25744420275284696, "grad_norm": 6.8981733996518235, "learning_rate": 8.70385045779474e-06, "loss": 17.8574, "step": 14084 }, { "epoch": 0.2574624819492935, "grad_norm": 7.958254587077419, "learning_rate": 8.703651602995605e-06, "loss": 18.485, "step": 14085 }, { "epoch": 0.25748076114574003, "grad_norm": 7.544062214252388, "learning_rate": 8.70345273521549e-06, "loss": 18.1102, "step": 14086 }, { "epoch": 0.25749904034218657, "grad_norm": 5.914422136425655, "learning_rate": 8.703253854455095e-06, "loss": 17.2718, "step": 14087 }, { "epoch": 0.2575173195386331, "grad_norm": 7.573257093818667, "learning_rate": 8.703054960715118e-06, "loss": 18.1964, "step": 14088 }, { "epoch": 0.2575355987350796, "grad_norm": 9.158315114811106, "learning_rate": 8.702856053996254e-06, "loss": 18.8402, "step": 14089 }, { "epoch": 0.2575538779315261, "grad_norm": 6.991139114717488, "learning_rate": 8.702657134299201e-06, "loss": 17.718, "step": 14090 }, { "epoch": 0.25757215712797266, "grad_norm": 7.629686502898165, "learning_rate": 8.702458201624657e-06, "loss": 17.9014, "step": 14091 }, { "epoch": 0.2575904363244192, "grad_norm": 5.873192405342047, "learning_rate": 8.702259255973315e-06, "loss": 17.5239, "step": 14092 }, { "epoch": 0.2576087155208657, "grad_norm": 8.36751721033347, "learning_rate": 8.70206029734588e-06, "loss": 17.4272, "step": 14093 }, { "epoch": 0.2576269947173122, "grad_norm": 6.438144667109233, "learning_rate": 8.701861325743043e-06, "loss": 17.3664, "step": 14094 }, { "epoch": 0.25764527391375874, "grad_norm": 5.985339058250906, "learning_rate": 8.701662341165502e-06, "loss": 17.5232, "step": 14095 }, { "epoch": 0.2576635531102053, "grad_norm": 7.386255272232896, "learning_rate": 8.701463343613957e-06, "loss": 17.9152, "step": 14096 }, { "epoch": 0.2576818323066518, "grad_norm": 6.83644269450758, "learning_rate": 8.701264333089104e-06, "loss": 17.8121, "step": 14097 }, { "epoch": 0.25770011150309835, "grad_norm": 6.551490461044074, "learning_rate": 8.70106530959164e-06, "loss": 17.7306, "step": 14098 }, { "epoch": 0.25771839069954483, "grad_norm": 7.642030726394805, "learning_rate": 8.700866273122264e-06, "loss": 18.1874, "step": 14099 }, { "epoch": 0.25773666989599137, "grad_norm": 6.977711470901913, "learning_rate": 8.700667223681672e-06, "loss": 17.7862, "step": 14100 }, { "epoch": 0.2577549490924379, "grad_norm": 7.207789253193888, "learning_rate": 8.700468161270563e-06, "loss": 17.9599, "step": 14101 }, { "epoch": 0.25777322828888444, "grad_norm": 6.095674515026532, "learning_rate": 8.700269085889634e-06, "loss": 17.4106, "step": 14102 }, { "epoch": 0.2577915074853309, "grad_norm": 7.4632469140407585, "learning_rate": 8.700069997539584e-06, "loss": 17.8782, "step": 14103 }, { "epoch": 0.25780978668177745, "grad_norm": 5.210845208316065, "learning_rate": 8.69987089622111e-06, "loss": 16.9404, "step": 14104 }, { "epoch": 0.257828065878224, "grad_norm": 8.190158864530341, "learning_rate": 8.699671781934907e-06, "loss": 18.2329, "step": 14105 }, { "epoch": 0.2578463450746705, "grad_norm": 8.530594742188688, "learning_rate": 8.699472654681677e-06, "loss": 18.5188, "step": 14106 }, { "epoch": 0.25786462427111706, "grad_norm": 5.757533253815192, "learning_rate": 8.699273514462116e-06, "loss": 17.2406, "step": 14107 }, { "epoch": 0.25788290346756354, "grad_norm": 7.059106984663106, "learning_rate": 8.699074361276923e-06, "loss": 17.765, "step": 14108 }, { "epoch": 0.2579011826640101, "grad_norm": 6.105087348487071, "learning_rate": 8.698875195126796e-06, "loss": 17.5095, "step": 14109 }, { "epoch": 0.2579194618604566, "grad_norm": 7.334075919128877, "learning_rate": 8.69867601601243e-06, "loss": 17.6755, "step": 14110 }, { "epoch": 0.25793774105690315, "grad_norm": 6.858709746550251, "learning_rate": 8.698476823934529e-06, "loss": 17.954, "step": 14111 }, { "epoch": 0.2579560202533497, "grad_norm": 5.552323518060006, "learning_rate": 8.698277618893784e-06, "loss": 17.2246, "step": 14112 }, { "epoch": 0.25797429944979616, "grad_norm": 7.004207031243522, "learning_rate": 8.6980784008909e-06, "loss": 17.7233, "step": 14113 }, { "epoch": 0.2579925786462427, "grad_norm": 6.8987513859118685, "learning_rate": 8.69787916992657e-06, "loss": 18.1197, "step": 14114 }, { "epoch": 0.25801085784268923, "grad_norm": 6.120743129137839, "learning_rate": 8.697679926001496e-06, "loss": 17.2854, "step": 14115 }, { "epoch": 0.25802913703913577, "grad_norm": 7.381495184158092, "learning_rate": 8.697480669116373e-06, "loss": 18.2839, "step": 14116 }, { "epoch": 0.2580474162355823, "grad_norm": 9.896775387834815, "learning_rate": 8.697281399271902e-06, "loss": 18.4107, "step": 14117 }, { "epoch": 0.2580656954320288, "grad_norm": 6.9585472563611654, "learning_rate": 8.69708211646878e-06, "loss": 17.7435, "step": 14118 }, { "epoch": 0.2580839746284753, "grad_norm": 8.22483594138805, "learning_rate": 8.696882820707708e-06, "loss": 18.5855, "step": 14119 }, { "epoch": 0.25810225382492186, "grad_norm": 7.280764723094892, "learning_rate": 8.696683511989381e-06, "loss": 17.8302, "step": 14120 }, { "epoch": 0.2581205330213684, "grad_norm": 8.321951653277008, "learning_rate": 8.696484190314498e-06, "loss": 18.6115, "step": 14121 }, { "epoch": 0.25813881221781493, "grad_norm": 6.6238999856305405, "learning_rate": 8.69628485568376e-06, "loss": 17.4643, "step": 14122 }, { "epoch": 0.2581570914142614, "grad_norm": 6.503710160939189, "learning_rate": 8.696085508097865e-06, "loss": 17.7514, "step": 14123 }, { "epoch": 0.25817537061070794, "grad_norm": 5.775511509425728, "learning_rate": 8.695886147557508e-06, "loss": 17.2477, "step": 14124 }, { "epoch": 0.2581936498071545, "grad_norm": 6.4818374067254485, "learning_rate": 8.695686774063394e-06, "loss": 17.3527, "step": 14125 }, { "epoch": 0.258211929003601, "grad_norm": 7.701902668089003, "learning_rate": 8.695487387616217e-06, "loss": 17.9372, "step": 14126 }, { "epoch": 0.25823020820004755, "grad_norm": 6.697959652546757, "learning_rate": 8.695287988216679e-06, "loss": 17.6515, "step": 14127 }, { "epoch": 0.25824848739649403, "grad_norm": 8.041435335901662, "learning_rate": 8.695088575865476e-06, "loss": 18.4181, "step": 14128 }, { "epoch": 0.25826676659294057, "grad_norm": 6.567403984090422, "learning_rate": 8.694889150563308e-06, "loss": 17.7741, "step": 14129 }, { "epoch": 0.2582850457893871, "grad_norm": 6.918198819549715, "learning_rate": 8.694689712310875e-06, "loss": 18.0391, "step": 14130 }, { "epoch": 0.25830332498583364, "grad_norm": 6.697927322725348, "learning_rate": 8.694490261108874e-06, "loss": 17.2621, "step": 14131 }, { "epoch": 0.2583216041822802, "grad_norm": 6.011527171912248, "learning_rate": 8.694290796958004e-06, "loss": 17.4086, "step": 14132 }, { "epoch": 0.25833988337872665, "grad_norm": 6.1745037575596475, "learning_rate": 8.694091319858968e-06, "loss": 17.41, "step": 14133 }, { "epoch": 0.2583581625751732, "grad_norm": 6.627972132134733, "learning_rate": 8.693891829812463e-06, "loss": 17.4986, "step": 14134 }, { "epoch": 0.2583764417716197, "grad_norm": 6.36915373828169, "learning_rate": 8.693692326819185e-06, "loss": 17.4596, "step": 14135 }, { "epoch": 0.25839472096806626, "grad_norm": 8.292436876517803, "learning_rate": 8.693492810879838e-06, "loss": 18.0977, "step": 14136 }, { "epoch": 0.25841300016451274, "grad_norm": 7.847096691742847, "learning_rate": 8.693293281995118e-06, "loss": 17.936, "step": 14137 }, { "epoch": 0.2584312793609593, "grad_norm": 6.133172170823049, "learning_rate": 8.693093740165725e-06, "loss": 17.4722, "step": 14138 }, { "epoch": 0.2584495585574058, "grad_norm": 8.876256817910992, "learning_rate": 8.69289418539236e-06, "loss": 18.5546, "step": 14139 }, { "epoch": 0.25846783775385235, "grad_norm": 6.003085546094814, "learning_rate": 8.692694617675721e-06, "loss": 17.5186, "step": 14140 }, { "epoch": 0.2584861169502989, "grad_norm": 6.202426404784945, "learning_rate": 8.692495037016509e-06, "loss": 17.2455, "step": 14141 }, { "epoch": 0.25850439614674536, "grad_norm": 8.097268470393931, "learning_rate": 8.692295443415422e-06, "loss": 17.5599, "step": 14142 }, { "epoch": 0.2585226753431919, "grad_norm": 5.829988422698184, "learning_rate": 8.692095836873159e-06, "loss": 17.1431, "step": 14143 }, { "epoch": 0.25854095453963843, "grad_norm": 6.237348241267024, "learning_rate": 8.691896217390421e-06, "loss": 17.3951, "step": 14144 }, { "epoch": 0.25855923373608497, "grad_norm": 6.28420789545548, "learning_rate": 8.691696584967905e-06, "loss": 17.5018, "step": 14145 }, { "epoch": 0.2585775129325315, "grad_norm": 6.371079497773243, "learning_rate": 8.691496939606315e-06, "loss": 17.581, "step": 14146 }, { "epoch": 0.258595792128978, "grad_norm": 5.7376703639207705, "learning_rate": 8.69129728130635e-06, "loss": 17.3224, "step": 14147 }, { "epoch": 0.2586140713254245, "grad_norm": 6.198038893127131, "learning_rate": 8.691097610068705e-06, "loss": 17.3983, "step": 14148 }, { "epoch": 0.25863235052187106, "grad_norm": 6.718181695319792, "learning_rate": 8.690897925894085e-06, "loss": 17.2986, "step": 14149 }, { "epoch": 0.2586506297183176, "grad_norm": 6.42715301445234, "learning_rate": 8.690698228783188e-06, "loss": 17.5038, "step": 14150 }, { "epoch": 0.25866890891476413, "grad_norm": 6.500218420373457, "learning_rate": 8.690498518736715e-06, "loss": 17.7886, "step": 14151 }, { "epoch": 0.2586871881112106, "grad_norm": 5.362641201212311, "learning_rate": 8.690298795755362e-06, "loss": 16.9478, "step": 14152 }, { "epoch": 0.25870546730765714, "grad_norm": 9.544536326229629, "learning_rate": 8.690099059839834e-06, "loss": 18.2867, "step": 14153 }, { "epoch": 0.2587237465041037, "grad_norm": 7.370924052965017, "learning_rate": 8.689899310990828e-06, "loss": 18.1317, "step": 14154 }, { "epoch": 0.2587420257005502, "grad_norm": 8.413257304783276, "learning_rate": 8.689699549209046e-06, "loss": 18.2081, "step": 14155 }, { "epoch": 0.25876030489699675, "grad_norm": 5.693286958941834, "learning_rate": 8.689499774495186e-06, "loss": 17.1675, "step": 14156 }, { "epoch": 0.25877858409344323, "grad_norm": 6.486964766282015, "learning_rate": 8.689299986849952e-06, "loss": 17.5089, "step": 14157 }, { "epoch": 0.25879686328988977, "grad_norm": 7.348963029950461, "learning_rate": 8.68910018627404e-06, "loss": 18.0337, "step": 14158 }, { "epoch": 0.2588151424863363, "grad_norm": 5.7470422693526, "learning_rate": 8.688900372768152e-06, "loss": 17.1518, "step": 14159 }, { "epoch": 0.25883342168278284, "grad_norm": 7.724256889793154, "learning_rate": 8.688700546332989e-06, "loss": 18.0078, "step": 14160 }, { "epoch": 0.2588517008792294, "grad_norm": 7.23284061844154, "learning_rate": 8.68850070696925e-06, "loss": 18.124, "step": 14161 }, { "epoch": 0.25886998007567585, "grad_norm": 6.394090609045597, "learning_rate": 8.688300854677636e-06, "loss": 17.4614, "step": 14162 }, { "epoch": 0.2588882592721224, "grad_norm": 6.993271961062448, "learning_rate": 8.688100989458848e-06, "loss": 17.6922, "step": 14163 }, { "epoch": 0.2589065384685689, "grad_norm": 6.712416485913673, "learning_rate": 8.687901111313587e-06, "loss": 17.6312, "step": 14164 }, { "epoch": 0.25892481766501546, "grad_norm": 7.060217217080197, "learning_rate": 8.68770122024255e-06, "loss": 17.6092, "step": 14165 }, { "epoch": 0.258943096861462, "grad_norm": 6.572287768613955, "learning_rate": 8.687501316246441e-06, "loss": 17.6795, "step": 14166 }, { "epoch": 0.2589613760579085, "grad_norm": 6.32235361034704, "learning_rate": 8.68730139932596e-06, "loss": 17.4758, "step": 14167 }, { "epoch": 0.258979655254355, "grad_norm": 6.795511266468757, "learning_rate": 8.687101469481809e-06, "loss": 17.9259, "step": 14168 }, { "epoch": 0.25899793445080155, "grad_norm": 6.103485003033015, "learning_rate": 8.686901526714686e-06, "loss": 17.1723, "step": 14169 }, { "epoch": 0.2590162136472481, "grad_norm": 8.167025485819599, "learning_rate": 8.686701571025293e-06, "loss": 18.2686, "step": 14170 }, { "epoch": 0.25903449284369456, "grad_norm": 8.928956404945458, "learning_rate": 8.686501602414332e-06, "loss": 18.4698, "step": 14171 }, { "epoch": 0.2590527720401411, "grad_norm": 6.538180948003185, "learning_rate": 8.686301620882502e-06, "loss": 17.5651, "step": 14172 }, { "epoch": 0.25907105123658764, "grad_norm": 8.051077866943555, "learning_rate": 8.686101626430505e-06, "loss": 18.3328, "step": 14173 }, { "epoch": 0.25908933043303417, "grad_norm": 7.46434567692638, "learning_rate": 8.685901619059041e-06, "loss": 17.6777, "step": 14174 }, { "epoch": 0.2591076096294807, "grad_norm": 6.7844431445493365, "learning_rate": 8.685701598768813e-06, "loss": 18.0084, "step": 14175 }, { "epoch": 0.2591258888259272, "grad_norm": 7.663998584431132, "learning_rate": 8.685501565560519e-06, "loss": 18.1988, "step": 14176 }, { "epoch": 0.2591441680223737, "grad_norm": 5.93982789782059, "learning_rate": 8.685301519434863e-06, "loss": 17.4027, "step": 14177 }, { "epoch": 0.25916244721882026, "grad_norm": 6.029004675891996, "learning_rate": 8.685101460392545e-06, "loss": 17.2887, "step": 14178 }, { "epoch": 0.2591807264152668, "grad_norm": 9.013383911709616, "learning_rate": 8.684901388434266e-06, "loss": 17.7601, "step": 14179 }, { "epoch": 0.25919900561171333, "grad_norm": 7.1253852801615984, "learning_rate": 8.684701303560728e-06, "loss": 17.8046, "step": 14180 }, { "epoch": 0.2592172848081598, "grad_norm": 6.100035498801834, "learning_rate": 8.68450120577263e-06, "loss": 17.3895, "step": 14181 }, { "epoch": 0.25923556400460634, "grad_norm": 5.189228718505733, "learning_rate": 8.684301095070677e-06, "loss": 17.1329, "step": 14182 }, { "epoch": 0.2592538432010529, "grad_norm": 6.665781143457299, "learning_rate": 8.684100971455567e-06, "loss": 17.7856, "step": 14183 }, { "epoch": 0.2592721223974994, "grad_norm": 7.47350467978439, "learning_rate": 8.683900834928003e-06, "loss": 18.08, "step": 14184 }, { "epoch": 0.25929040159394595, "grad_norm": 7.525122210015121, "learning_rate": 8.683700685488687e-06, "loss": 18.0142, "step": 14185 }, { "epoch": 0.25930868079039243, "grad_norm": 6.440948791857707, "learning_rate": 8.683500523138318e-06, "loss": 17.8687, "step": 14186 }, { "epoch": 0.25932695998683897, "grad_norm": 6.380271622063288, "learning_rate": 8.6833003478776e-06, "loss": 17.5271, "step": 14187 }, { "epoch": 0.2593452391832855, "grad_norm": 6.1840804899448765, "learning_rate": 8.683100159707235e-06, "loss": 17.2603, "step": 14188 }, { "epoch": 0.25936351837973204, "grad_norm": 7.107260335076593, "learning_rate": 8.682899958627922e-06, "loss": 18.0198, "step": 14189 }, { "epoch": 0.2593817975761786, "grad_norm": 7.361694465003641, "learning_rate": 8.682699744640365e-06, "loss": 17.6505, "step": 14190 }, { "epoch": 0.25940007677262505, "grad_norm": 9.544649755749758, "learning_rate": 8.682499517745266e-06, "loss": 18.7415, "step": 14191 }, { "epoch": 0.2594183559690716, "grad_norm": 7.23844238396064, "learning_rate": 8.682299277943325e-06, "loss": 17.8093, "step": 14192 }, { "epoch": 0.2594366351655181, "grad_norm": 7.331660525820289, "learning_rate": 8.682099025235244e-06, "loss": 17.6294, "step": 14193 }, { "epoch": 0.25945491436196466, "grad_norm": 6.918174831241119, "learning_rate": 8.681898759621726e-06, "loss": 17.565, "step": 14194 }, { "epoch": 0.2594731935584112, "grad_norm": 7.533312081848028, "learning_rate": 8.681698481103472e-06, "loss": 17.773, "step": 14195 }, { "epoch": 0.2594914727548577, "grad_norm": 6.6700859081693205, "learning_rate": 8.681498189681185e-06, "loss": 17.4458, "step": 14196 }, { "epoch": 0.2595097519513042, "grad_norm": 6.484848541244922, "learning_rate": 8.681297885355567e-06, "loss": 17.4729, "step": 14197 }, { "epoch": 0.25952803114775075, "grad_norm": 9.179260736816243, "learning_rate": 8.681097568127316e-06, "loss": 18.4558, "step": 14198 }, { "epoch": 0.2595463103441973, "grad_norm": 6.965157906115924, "learning_rate": 8.680897237997141e-06, "loss": 17.6901, "step": 14199 }, { "epoch": 0.2595645895406438, "grad_norm": 7.252637507695194, "learning_rate": 8.68069689496574e-06, "loss": 18.0804, "step": 14200 }, { "epoch": 0.2595828687370903, "grad_norm": 8.236373944975917, "learning_rate": 8.680496539033814e-06, "loss": 17.9463, "step": 14201 }, { "epoch": 0.25960114793353684, "grad_norm": 7.359481017944861, "learning_rate": 8.680296170202066e-06, "loss": 17.9905, "step": 14202 }, { "epoch": 0.25961942712998337, "grad_norm": 6.535176048230325, "learning_rate": 8.680095788471202e-06, "loss": 17.5322, "step": 14203 }, { "epoch": 0.2596377063264299, "grad_norm": 6.628991932210777, "learning_rate": 8.679895393841922e-06, "loss": 17.411, "step": 14204 }, { "epoch": 0.2596559855228764, "grad_norm": 6.523055493984605, "learning_rate": 8.679694986314926e-06, "loss": 17.4709, "step": 14205 }, { "epoch": 0.2596742647193229, "grad_norm": 7.371541668892755, "learning_rate": 8.67949456589092e-06, "loss": 17.7989, "step": 14206 }, { "epoch": 0.25969254391576946, "grad_norm": 6.085751981455777, "learning_rate": 8.679294132570604e-06, "loss": 17.2445, "step": 14207 }, { "epoch": 0.259710823112216, "grad_norm": 5.324822737500185, "learning_rate": 8.67909368635468e-06, "loss": 16.9973, "step": 14208 }, { "epoch": 0.25972910230866253, "grad_norm": 6.077237755113356, "learning_rate": 8.678893227243854e-06, "loss": 17.3057, "step": 14209 }, { "epoch": 0.259747381505109, "grad_norm": 6.874065151832448, "learning_rate": 8.678692755238827e-06, "loss": 17.8281, "step": 14210 }, { "epoch": 0.25976566070155555, "grad_norm": 6.495768176593526, "learning_rate": 8.678492270340299e-06, "loss": 17.7346, "step": 14211 }, { "epoch": 0.2597839398980021, "grad_norm": 7.387890247946272, "learning_rate": 8.678291772548975e-06, "loss": 17.9833, "step": 14212 }, { "epoch": 0.2598022190944486, "grad_norm": 6.996674065804481, "learning_rate": 8.67809126186556e-06, "loss": 17.7884, "step": 14213 }, { "epoch": 0.25982049829089515, "grad_norm": 6.281673670852194, "learning_rate": 8.67789073829075e-06, "loss": 17.3121, "step": 14214 }, { "epoch": 0.25983877748734163, "grad_norm": 7.452836805760374, "learning_rate": 8.677690201825255e-06, "loss": 17.789, "step": 14215 }, { "epoch": 0.25985705668378817, "grad_norm": 5.957035006205307, "learning_rate": 8.677489652469775e-06, "loss": 17.4055, "step": 14216 }, { "epoch": 0.2598753358802347, "grad_norm": 6.3458021800711615, "learning_rate": 8.677289090225013e-06, "loss": 17.3333, "step": 14217 }, { "epoch": 0.25989361507668124, "grad_norm": 6.118127226736724, "learning_rate": 8.677088515091671e-06, "loss": 17.3757, "step": 14218 }, { "epoch": 0.2599118942731278, "grad_norm": 6.751555649206739, "learning_rate": 8.676887927070453e-06, "loss": 17.5914, "step": 14219 }, { "epoch": 0.25993017346957425, "grad_norm": 6.897398110608393, "learning_rate": 8.676687326162063e-06, "loss": 17.4974, "step": 14220 }, { "epoch": 0.2599484526660208, "grad_norm": 5.837874375748976, "learning_rate": 8.676486712367201e-06, "loss": 17.2377, "step": 14221 }, { "epoch": 0.2599667318624673, "grad_norm": 7.752103161333534, "learning_rate": 8.676286085686573e-06, "loss": 18.2379, "step": 14222 }, { "epoch": 0.25998501105891386, "grad_norm": 5.741388568907354, "learning_rate": 8.676085446120882e-06, "loss": 17.176, "step": 14223 }, { "epoch": 0.2600032902553604, "grad_norm": 7.389644836119392, "learning_rate": 8.67588479367083e-06, "loss": 17.7166, "step": 14224 }, { "epoch": 0.2600215694518069, "grad_norm": 6.8645852808263825, "learning_rate": 8.675684128337121e-06, "loss": 17.7229, "step": 14225 }, { "epoch": 0.2600398486482534, "grad_norm": 6.655345409236313, "learning_rate": 8.675483450120456e-06, "loss": 17.732, "step": 14226 }, { "epoch": 0.26005812784469995, "grad_norm": 6.494098822622264, "learning_rate": 8.675282759021544e-06, "loss": 17.2915, "step": 14227 }, { "epoch": 0.2600764070411465, "grad_norm": 6.522231857490047, "learning_rate": 8.675082055041082e-06, "loss": 17.5535, "step": 14228 }, { "epoch": 0.260094686237593, "grad_norm": 6.703443328300827, "learning_rate": 8.674881338179778e-06, "loss": 17.7362, "step": 14229 }, { "epoch": 0.2601129654340395, "grad_norm": 7.665242439256601, "learning_rate": 8.674680608438332e-06, "loss": 17.1427, "step": 14230 }, { "epoch": 0.26013124463048604, "grad_norm": 5.979924565427676, "learning_rate": 8.674479865817452e-06, "loss": 17.3177, "step": 14231 }, { "epoch": 0.26014952382693257, "grad_norm": 6.298123909968524, "learning_rate": 8.674279110317837e-06, "loss": 17.3941, "step": 14232 }, { "epoch": 0.2601678030233791, "grad_norm": 6.134596837049285, "learning_rate": 8.674078341940193e-06, "loss": 17.3473, "step": 14233 }, { "epoch": 0.26018608221982564, "grad_norm": 6.690030188988819, "learning_rate": 8.673877560685225e-06, "loss": 17.6278, "step": 14234 }, { "epoch": 0.2602043614162721, "grad_norm": 7.04849350575925, "learning_rate": 8.673676766553632e-06, "loss": 17.5964, "step": 14235 }, { "epoch": 0.26022264061271866, "grad_norm": 5.807512367297847, "learning_rate": 8.673475959546122e-06, "loss": 17.1041, "step": 14236 }, { "epoch": 0.2602409198091652, "grad_norm": 7.804949548570082, "learning_rate": 8.673275139663397e-06, "loss": 18.0493, "step": 14237 }, { "epoch": 0.26025919900561173, "grad_norm": 5.631732771120599, "learning_rate": 8.673074306906162e-06, "loss": 17.1282, "step": 14238 }, { "epoch": 0.2602774782020582, "grad_norm": 6.623288987579542, "learning_rate": 8.67287346127512e-06, "loss": 17.4896, "step": 14239 }, { "epoch": 0.26029575739850475, "grad_norm": 7.451615977421559, "learning_rate": 8.672672602770976e-06, "loss": 17.6082, "step": 14240 }, { "epoch": 0.2603140365949513, "grad_norm": 6.66514506670663, "learning_rate": 8.672471731394432e-06, "loss": 17.5331, "step": 14241 }, { "epoch": 0.2603323157913978, "grad_norm": 6.899726283753662, "learning_rate": 8.672270847146193e-06, "loss": 17.6937, "step": 14242 }, { "epoch": 0.26035059498784435, "grad_norm": 7.7005808922718115, "learning_rate": 8.672069950026964e-06, "loss": 18.1453, "step": 14243 }, { "epoch": 0.26036887418429083, "grad_norm": 6.37171306374178, "learning_rate": 8.67186904003745e-06, "loss": 17.5089, "step": 14244 }, { "epoch": 0.26038715338073737, "grad_norm": 7.908325408627519, "learning_rate": 8.671668117178351e-06, "loss": 18.2844, "step": 14245 }, { "epoch": 0.2604054325771839, "grad_norm": 5.920169292135648, "learning_rate": 8.671467181450376e-06, "loss": 17.1884, "step": 14246 }, { "epoch": 0.26042371177363044, "grad_norm": 6.898063108802871, "learning_rate": 8.671266232854227e-06, "loss": 17.935, "step": 14247 }, { "epoch": 0.260441990970077, "grad_norm": 6.529362513605127, "learning_rate": 8.671065271390606e-06, "loss": 17.6221, "step": 14248 }, { "epoch": 0.26046027016652346, "grad_norm": 7.228320070578015, "learning_rate": 8.670864297060223e-06, "loss": 18.1665, "step": 14249 }, { "epoch": 0.26047854936297, "grad_norm": 5.948830610352112, "learning_rate": 8.670663309863778e-06, "loss": 17.3457, "step": 14250 }, { "epoch": 0.2604968285594165, "grad_norm": 6.602771669230802, "learning_rate": 8.670462309801976e-06, "loss": 17.6515, "step": 14251 }, { "epoch": 0.26051510775586306, "grad_norm": 7.077321993934937, "learning_rate": 8.670261296875521e-06, "loss": 17.8299, "step": 14252 }, { "epoch": 0.2605333869523096, "grad_norm": 4.706703887188802, "learning_rate": 8.670060271085122e-06, "loss": 16.854, "step": 14253 }, { "epoch": 0.2605516661487561, "grad_norm": 6.60047464851554, "learning_rate": 8.669859232431478e-06, "loss": 17.4978, "step": 14254 }, { "epoch": 0.2605699453452026, "grad_norm": 5.9323500391190285, "learning_rate": 8.669658180915295e-06, "loss": 17.3478, "step": 14255 }, { "epoch": 0.26058822454164915, "grad_norm": 6.901721871946896, "learning_rate": 8.669457116537281e-06, "loss": 17.6855, "step": 14256 }, { "epoch": 0.2606065037380957, "grad_norm": 6.578632990964375, "learning_rate": 8.669256039298136e-06, "loss": 17.3722, "step": 14257 }, { "epoch": 0.2606247829345422, "grad_norm": 5.39057658087867, "learning_rate": 8.669054949198567e-06, "loss": 16.9551, "step": 14258 }, { "epoch": 0.2606430621309887, "grad_norm": 6.211108870611746, "learning_rate": 8.66885384623928e-06, "loss": 17.3891, "step": 14259 }, { "epoch": 0.26066134132743524, "grad_norm": 6.384579728924579, "learning_rate": 8.668652730420979e-06, "loss": 17.6692, "step": 14260 }, { "epoch": 0.26067962052388177, "grad_norm": 5.313832268782017, "learning_rate": 8.668451601744367e-06, "loss": 16.9212, "step": 14261 }, { "epoch": 0.2606978997203283, "grad_norm": 6.327507524674901, "learning_rate": 8.668250460210152e-06, "loss": 17.2745, "step": 14262 }, { "epoch": 0.26071617891677484, "grad_norm": 7.276705564390637, "learning_rate": 8.668049305819038e-06, "loss": 17.9003, "step": 14263 }, { "epoch": 0.2607344581132213, "grad_norm": 6.954134425200045, "learning_rate": 8.667848138571727e-06, "loss": 17.9699, "step": 14264 }, { "epoch": 0.26075273730966786, "grad_norm": 6.360331324137992, "learning_rate": 8.667646958468928e-06, "loss": 17.4674, "step": 14265 }, { "epoch": 0.2607710165061144, "grad_norm": 6.5458612551608555, "learning_rate": 8.667445765511345e-06, "loss": 17.5799, "step": 14266 }, { "epoch": 0.26078929570256093, "grad_norm": 6.615452750596066, "learning_rate": 8.66724455969968e-06, "loss": 17.5238, "step": 14267 }, { "epoch": 0.26080757489900747, "grad_norm": 6.133766441116057, "learning_rate": 8.667043341034646e-06, "loss": 17.353, "step": 14268 }, { "epoch": 0.26082585409545395, "grad_norm": 6.822704766326976, "learning_rate": 8.66684210951694e-06, "loss": 17.8244, "step": 14269 }, { "epoch": 0.2608441332919005, "grad_norm": 6.510204609711557, "learning_rate": 8.66664086514727e-06, "loss": 17.4648, "step": 14270 }, { "epoch": 0.260862412488347, "grad_norm": 7.596246445747984, "learning_rate": 8.666439607926345e-06, "loss": 17.7896, "step": 14271 }, { "epoch": 0.26088069168479355, "grad_norm": 5.235495628093616, "learning_rate": 8.666238337854864e-06, "loss": 16.7636, "step": 14272 }, { "epoch": 0.26089897088124003, "grad_norm": 7.000231983092996, "learning_rate": 8.666037054933538e-06, "loss": 17.706, "step": 14273 }, { "epoch": 0.26091725007768657, "grad_norm": 6.577012523913233, "learning_rate": 8.66583575916307e-06, "loss": 17.6584, "step": 14274 }, { "epoch": 0.2609355292741331, "grad_norm": 6.546348910698961, "learning_rate": 8.665634450544165e-06, "loss": 17.2979, "step": 14275 }, { "epoch": 0.26095380847057964, "grad_norm": 6.821977935311509, "learning_rate": 8.66543312907753e-06, "loss": 17.4793, "step": 14276 }, { "epoch": 0.2609720876670262, "grad_norm": 7.523576969932582, "learning_rate": 8.665231794763868e-06, "loss": 18.2738, "step": 14277 }, { "epoch": 0.26099036686347266, "grad_norm": 5.151727795926751, "learning_rate": 8.66503044760389e-06, "loss": 16.9207, "step": 14278 }, { "epoch": 0.2610086460599192, "grad_norm": 6.743005251847526, "learning_rate": 8.664829087598297e-06, "loss": 17.8879, "step": 14279 }, { "epoch": 0.2610269252563657, "grad_norm": 7.27416254634992, "learning_rate": 8.664627714747796e-06, "loss": 17.6607, "step": 14280 }, { "epoch": 0.26104520445281226, "grad_norm": 5.7338638288078725, "learning_rate": 8.66442632905309e-06, "loss": 17.0997, "step": 14281 }, { "epoch": 0.2610634836492588, "grad_norm": 6.238847600389716, "learning_rate": 8.664224930514891e-06, "loss": 17.543, "step": 14282 }, { "epoch": 0.2610817628457053, "grad_norm": 6.262960496814813, "learning_rate": 8.664023519133901e-06, "loss": 17.1668, "step": 14283 }, { "epoch": 0.2611000420421518, "grad_norm": 6.312394038715761, "learning_rate": 8.663822094910826e-06, "loss": 17.2952, "step": 14284 }, { "epoch": 0.26111832123859835, "grad_norm": 7.314451336858432, "learning_rate": 8.663620657846372e-06, "loss": 17.9633, "step": 14285 }, { "epoch": 0.2611366004350449, "grad_norm": 7.068644350015441, "learning_rate": 8.663419207941248e-06, "loss": 17.6935, "step": 14286 }, { "epoch": 0.2611548796314914, "grad_norm": 6.731138266776832, "learning_rate": 8.663217745196155e-06, "loss": 17.6665, "step": 14287 }, { "epoch": 0.2611731588279379, "grad_norm": 7.2949333980703726, "learning_rate": 8.663016269611802e-06, "loss": 17.6784, "step": 14288 }, { "epoch": 0.26119143802438444, "grad_norm": 6.200160114403457, "learning_rate": 8.662814781188896e-06, "loss": 17.2703, "step": 14289 }, { "epoch": 0.261209717220831, "grad_norm": 7.511082518027258, "learning_rate": 8.662613279928141e-06, "loss": 17.5261, "step": 14290 }, { "epoch": 0.2612279964172775, "grad_norm": 5.669264255267169, "learning_rate": 8.662411765830245e-06, "loss": 17.1438, "step": 14291 }, { "epoch": 0.26124627561372404, "grad_norm": 5.7018480564985365, "learning_rate": 8.662210238895914e-06, "loss": 17.0191, "step": 14292 }, { "epoch": 0.2612645548101705, "grad_norm": 7.153404570753518, "learning_rate": 8.662008699125853e-06, "loss": 17.5494, "step": 14293 }, { "epoch": 0.26128283400661706, "grad_norm": 5.644824221333599, "learning_rate": 8.661807146520769e-06, "loss": 17.2817, "step": 14294 }, { "epoch": 0.2613011132030636, "grad_norm": 6.488183206471269, "learning_rate": 8.661605581081369e-06, "loss": 17.728, "step": 14295 }, { "epoch": 0.26131939239951013, "grad_norm": 6.725374470937904, "learning_rate": 8.66140400280836e-06, "loss": 17.5082, "step": 14296 }, { "epoch": 0.26133767159595667, "grad_norm": 6.779234937895279, "learning_rate": 8.661202411702448e-06, "loss": 17.4111, "step": 14297 }, { "epoch": 0.26135595079240315, "grad_norm": 7.621404763034471, "learning_rate": 8.661000807764338e-06, "loss": 17.9106, "step": 14298 }, { "epoch": 0.2613742299888497, "grad_norm": 6.2451423751081405, "learning_rate": 8.660799190994738e-06, "loss": 17.3519, "step": 14299 }, { "epoch": 0.2613925091852962, "grad_norm": 6.7033198164479, "learning_rate": 8.660597561394354e-06, "loss": 17.8168, "step": 14300 }, { "epoch": 0.26141078838174275, "grad_norm": 5.580826480366983, "learning_rate": 8.660395918963894e-06, "loss": 17.1418, "step": 14301 }, { "epoch": 0.2614290675781893, "grad_norm": 8.43652772775704, "learning_rate": 8.660194263704063e-06, "loss": 18.2843, "step": 14302 }, { "epoch": 0.26144734677463577, "grad_norm": 8.126025296361352, "learning_rate": 8.659992595615569e-06, "loss": 18.0668, "step": 14303 }, { "epoch": 0.2614656259710823, "grad_norm": 9.150277131661936, "learning_rate": 8.65979091469912e-06, "loss": 18.4163, "step": 14304 }, { "epoch": 0.26148390516752884, "grad_norm": 9.556173073928285, "learning_rate": 8.65958922095542e-06, "loss": 18.7502, "step": 14305 }, { "epoch": 0.2615021843639754, "grad_norm": 6.128587431759308, "learning_rate": 8.659387514385179e-06, "loss": 17.253, "step": 14306 }, { "epoch": 0.26152046356042186, "grad_norm": 6.943525255298647, "learning_rate": 8.6591857949891e-06, "loss": 17.7416, "step": 14307 }, { "epoch": 0.2615387427568684, "grad_norm": 6.444330711899049, "learning_rate": 8.658984062767893e-06, "loss": 17.4775, "step": 14308 }, { "epoch": 0.2615570219533149, "grad_norm": 8.10083039055212, "learning_rate": 8.658782317722265e-06, "loss": 18.2508, "step": 14309 }, { "epoch": 0.26157530114976146, "grad_norm": 6.220214814321148, "learning_rate": 8.658580559852922e-06, "loss": 17.3301, "step": 14310 }, { "epoch": 0.261593580346208, "grad_norm": 7.169790426415843, "learning_rate": 8.658378789160573e-06, "loss": 17.8293, "step": 14311 }, { "epoch": 0.2616118595426545, "grad_norm": 6.650129065417017, "learning_rate": 8.658177005645922e-06, "loss": 17.2107, "step": 14312 }, { "epoch": 0.261630138739101, "grad_norm": 5.8399668845782955, "learning_rate": 8.657975209309679e-06, "loss": 17.4181, "step": 14313 }, { "epoch": 0.26164841793554755, "grad_norm": 6.8481940107929455, "learning_rate": 8.657773400152549e-06, "loss": 17.5672, "step": 14314 }, { "epoch": 0.2616666971319941, "grad_norm": 6.905469070345408, "learning_rate": 8.657571578175243e-06, "loss": 18.1343, "step": 14315 }, { "epoch": 0.2616849763284406, "grad_norm": 6.860346363177648, "learning_rate": 8.657369743378464e-06, "loss": 17.866, "step": 14316 }, { "epoch": 0.2617032555248871, "grad_norm": 6.535592953025446, "learning_rate": 8.657167895762923e-06, "loss": 17.4644, "step": 14317 }, { "epoch": 0.26172153472133364, "grad_norm": 7.669681712929007, "learning_rate": 8.656966035329325e-06, "loss": 17.6827, "step": 14318 }, { "epoch": 0.2617398139177802, "grad_norm": 5.8820288919884876, "learning_rate": 8.656764162078377e-06, "loss": 17.1236, "step": 14319 }, { "epoch": 0.2617580931142267, "grad_norm": 6.043916132868095, "learning_rate": 8.65656227601079e-06, "loss": 17.5664, "step": 14320 }, { "epoch": 0.26177637231067324, "grad_norm": 7.263720419922502, "learning_rate": 8.656360377127269e-06, "loss": 18.2175, "step": 14321 }, { "epoch": 0.2617946515071197, "grad_norm": 8.64963241796326, "learning_rate": 8.656158465428523e-06, "loss": 17.9681, "step": 14322 }, { "epoch": 0.26181293070356626, "grad_norm": 5.8059009438770515, "learning_rate": 8.655956540915256e-06, "loss": 17.4548, "step": 14323 }, { "epoch": 0.2618312099000128, "grad_norm": 5.79451611257415, "learning_rate": 8.65575460358818e-06, "loss": 17.3694, "step": 14324 }, { "epoch": 0.26184948909645933, "grad_norm": 6.402302482213285, "learning_rate": 8.655552653448003e-06, "loss": 17.8392, "step": 14325 }, { "epoch": 0.26186776829290587, "grad_norm": 8.61783076396563, "learning_rate": 8.65535069049543e-06, "loss": 18.7223, "step": 14326 }, { "epoch": 0.26188604748935235, "grad_norm": 8.587924319046229, "learning_rate": 8.65514871473117e-06, "loss": 18.417, "step": 14327 }, { "epoch": 0.2619043266857989, "grad_norm": 5.952877435089593, "learning_rate": 8.654946726155931e-06, "loss": 17.1031, "step": 14328 }, { "epoch": 0.2619226058822454, "grad_norm": 6.014144087383838, "learning_rate": 8.65474472477042e-06, "loss": 17.2704, "step": 14329 }, { "epoch": 0.26194088507869195, "grad_norm": 5.634620473152719, "learning_rate": 8.654542710575348e-06, "loss": 17.1087, "step": 14330 }, { "epoch": 0.2619591642751385, "grad_norm": 7.144477716391988, "learning_rate": 8.654340683571418e-06, "loss": 17.6253, "step": 14331 }, { "epoch": 0.26197744347158497, "grad_norm": 6.951082058232852, "learning_rate": 8.654138643759344e-06, "loss": 17.5187, "step": 14332 }, { "epoch": 0.2619957226680315, "grad_norm": 6.539490304534041, "learning_rate": 8.653936591139829e-06, "loss": 17.753, "step": 14333 }, { "epoch": 0.26201400186447804, "grad_norm": 7.091311866726209, "learning_rate": 8.653734525713583e-06, "loss": 17.6396, "step": 14334 }, { "epoch": 0.2620322810609246, "grad_norm": 7.244588545356523, "learning_rate": 8.653532447481316e-06, "loss": 18.1019, "step": 14335 }, { "epoch": 0.2620505602573711, "grad_norm": 6.7605898758674625, "learning_rate": 8.653330356443735e-06, "loss": 18.0615, "step": 14336 }, { "epoch": 0.2620688394538176, "grad_norm": 6.235687289679575, "learning_rate": 8.653128252601547e-06, "loss": 17.3753, "step": 14337 }, { "epoch": 0.26208711865026413, "grad_norm": 6.913880155405421, "learning_rate": 8.652926135955464e-06, "loss": 17.8539, "step": 14338 }, { "epoch": 0.26210539784671066, "grad_norm": 8.72100510318729, "learning_rate": 8.652724006506189e-06, "loss": 18.2457, "step": 14339 }, { "epoch": 0.2621236770431572, "grad_norm": 7.547763899914024, "learning_rate": 8.652521864254435e-06, "loss": 18.258, "step": 14340 }, { "epoch": 0.2621419562396037, "grad_norm": 8.664727161749722, "learning_rate": 8.652319709200907e-06, "loss": 17.8626, "step": 14341 }, { "epoch": 0.2621602354360502, "grad_norm": 6.820000589447856, "learning_rate": 8.652117541346317e-06, "loss": 17.6796, "step": 14342 }, { "epoch": 0.26217851463249675, "grad_norm": 7.484719337981444, "learning_rate": 8.651915360691373e-06, "loss": 17.9818, "step": 14343 }, { "epoch": 0.2621967938289433, "grad_norm": 6.702311338215585, "learning_rate": 8.65171316723678e-06, "loss": 17.9814, "step": 14344 }, { "epoch": 0.2622150730253898, "grad_norm": 6.274173578402506, "learning_rate": 8.651510960983249e-06, "loss": 17.5691, "step": 14345 }, { "epoch": 0.2622333522218363, "grad_norm": 5.154677440741493, "learning_rate": 8.65130874193149e-06, "loss": 17.0477, "step": 14346 }, { "epoch": 0.26225163141828284, "grad_norm": 6.143556865771956, "learning_rate": 8.651106510082211e-06, "loss": 17.2127, "step": 14347 }, { "epoch": 0.2622699106147294, "grad_norm": 6.597435458258558, "learning_rate": 8.65090426543612e-06, "loss": 17.7153, "step": 14348 }, { "epoch": 0.2622881898111759, "grad_norm": 7.095482561854488, "learning_rate": 8.650702007993928e-06, "loss": 17.5848, "step": 14349 }, { "epoch": 0.26230646900762244, "grad_norm": 7.919294559570361, "learning_rate": 8.65049973775634e-06, "loss": 18.287, "step": 14350 }, { "epoch": 0.2623247482040689, "grad_norm": 6.989630058163078, "learning_rate": 8.650297454724069e-06, "loss": 17.9026, "step": 14351 }, { "epoch": 0.26234302740051546, "grad_norm": 7.142012838853306, "learning_rate": 8.65009515889782e-06, "loss": 17.6811, "step": 14352 }, { "epoch": 0.262361306596962, "grad_norm": 7.663580209790349, "learning_rate": 8.649892850278305e-06, "loss": 17.713, "step": 14353 }, { "epoch": 0.26237958579340853, "grad_norm": 6.804175445291541, "learning_rate": 8.649690528866234e-06, "loss": 17.546, "step": 14354 }, { "epoch": 0.26239786498985507, "grad_norm": 5.806853932241845, "learning_rate": 8.649488194662313e-06, "loss": 17.1717, "step": 14355 }, { "epoch": 0.26241614418630155, "grad_norm": 7.266232184784769, "learning_rate": 8.649285847667252e-06, "loss": 17.4944, "step": 14356 }, { "epoch": 0.2624344233827481, "grad_norm": 6.913518241050929, "learning_rate": 8.649083487881762e-06, "loss": 17.7306, "step": 14357 }, { "epoch": 0.2624527025791946, "grad_norm": 7.103733007219354, "learning_rate": 8.64888111530655e-06, "loss": 17.883, "step": 14358 }, { "epoch": 0.26247098177564115, "grad_norm": 7.970926037303044, "learning_rate": 8.648678729942329e-06, "loss": 18.3431, "step": 14359 }, { "epoch": 0.2624892609720877, "grad_norm": 6.871957535751437, "learning_rate": 8.648476331789802e-06, "loss": 17.7505, "step": 14360 }, { "epoch": 0.26250754016853417, "grad_norm": 6.039876635119999, "learning_rate": 8.648273920849684e-06, "loss": 17.4392, "step": 14361 }, { "epoch": 0.2625258193649807, "grad_norm": 7.349633870407833, "learning_rate": 8.648071497122681e-06, "loss": 17.7897, "step": 14362 }, { "epoch": 0.26254409856142724, "grad_norm": 6.617894636394728, "learning_rate": 8.647869060609506e-06, "loss": 17.5581, "step": 14363 }, { "epoch": 0.2625623777578738, "grad_norm": 7.306333841700882, "learning_rate": 8.647666611310865e-06, "loss": 17.6994, "step": 14364 }, { "epoch": 0.2625806569543203, "grad_norm": 7.493017193538997, "learning_rate": 8.647464149227469e-06, "loss": 17.8527, "step": 14365 }, { "epoch": 0.2625989361507668, "grad_norm": 6.956499612622345, "learning_rate": 8.647261674360029e-06, "loss": 17.6242, "step": 14366 }, { "epoch": 0.26261721534721333, "grad_norm": 9.324868903766106, "learning_rate": 8.647059186709252e-06, "loss": 17.9245, "step": 14367 }, { "epoch": 0.26263549454365986, "grad_norm": 6.0025171735272185, "learning_rate": 8.646856686275851e-06, "loss": 17.3891, "step": 14368 }, { "epoch": 0.2626537737401064, "grad_norm": 7.528251366992634, "learning_rate": 8.646654173060531e-06, "loss": 18.0916, "step": 14369 }, { "epoch": 0.26267205293655294, "grad_norm": 7.942889659972795, "learning_rate": 8.646451647064007e-06, "loss": 18.0743, "step": 14370 }, { "epoch": 0.2626903321329994, "grad_norm": 6.4762044959895695, "learning_rate": 8.646249108286985e-06, "loss": 17.4625, "step": 14371 }, { "epoch": 0.26270861132944595, "grad_norm": 6.371259526902738, "learning_rate": 8.646046556730176e-06, "loss": 17.4361, "step": 14372 }, { "epoch": 0.2627268905258925, "grad_norm": 8.00752586799204, "learning_rate": 8.645843992394291e-06, "loss": 18.4024, "step": 14373 }, { "epoch": 0.262745169722339, "grad_norm": 7.283049339422857, "learning_rate": 8.64564141528004e-06, "loss": 17.7196, "step": 14374 }, { "epoch": 0.2627634489187855, "grad_norm": 5.903938845138504, "learning_rate": 8.64543882538813e-06, "loss": 17.3946, "step": 14375 }, { "epoch": 0.26278172811523204, "grad_norm": 6.541995076847163, "learning_rate": 8.645236222719275e-06, "loss": 17.7814, "step": 14376 }, { "epoch": 0.2628000073116786, "grad_norm": 6.989811084200486, "learning_rate": 8.645033607274183e-06, "loss": 17.7339, "step": 14377 }, { "epoch": 0.2628182865081251, "grad_norm": 6.963482701141724, "learning_rate": 8.644830979053565e-06, "loss": 17.754, "step": 14378 }, { "epoch": 0.26283656570457165, "grad_norm": 6.733724655273646, "learning_rate": 8.64462833805813e-06, "loss": 17.6504, "step": 14379 }, { "epoch": 0.2628548449010181, "grad_norm": 6.9312705576119145, "learning_rate": 8.644425684288589e-06, "loss": 17.6847, "step": 14380 }, { "epoch": 0.26287312409746466, "grad_norm": 5.788289883011871, "learning_rate": 8.644223017745653e-06, "loss": 17.2606, "step": 14381 }, { "epoch": 0.2628914032939112, "grad_norm": 6.175323501223622, "learning_rate": 8.644020338430029e-06, "loss": 17.3564, "step": 14382 }, { "epoch": 0.26290968249035773, "grad_norm": 8.410916425732914, "learning_rate": 8.643817646342433e-06, "loss": 18.51, "step": 14383 }, { "epoch": 0.26292796168680427, "grad_norm": 7.237520964244881, "learning_rate": 8.64361494148357e-06, "loss": 17.6957, "step": 14384 }, { "epoch": 0.26294624088325075, "grad_norm": 7.886911415886462, "learning_rate": 8.643412223854154e-06, "loss": 18.1002, "step": 14385 }, { "epoch": 0.2629645200796973, "grad_norm": 6.373927767746825, "learning_rate": 8.643209493454893e-06, "loss": 17.5343, "step": 14386 }, { "epoch": 0.2629827992761438, "grad_norm": 4.982759648815514, "learning_rate": 8.6430067502865e-06, "loss": 16.9397, "step": 14387 }, { "epoch": 0.26300107847259035, "grad_norm": 7.369552463687001, "learning_rate": 8.642803994349686e-06, "loss": 17.7397, "step": 14388 }, { "epoch": 0.2630193576690369, "grad_norm": 6.155446198720482, "learning_rate": 8.642601225645158e-06, "loss": 17.3574, "step": 14389 }, { "epoch": 0.26303763686548337, "grad_norm": 6.270228967039644, "learning_rate": 8.64239844417363e-06, "loss": 17.4627, "step": 14390 }, { "epoch": 0.2630559160619299, "grad_norm": 6.196351432520529, "learning_rate": 8.64219564993581e-06, "loss": 17.2828, "step": 14391 }, { "epoch": 0.26307419525837644, "grad_norm": 6.101617643826063, "learning_rate": 8.641992842932411e-06, "loss": 17.301, "step": 14392 }, { "epoch": 0.263092474454823, "grad_norm": 6.358708273663958, "learning_rate": 8.641790023164146e-06, "loss": 17.3201, "step": 14393 }, { "epoch": 0.2631107536512695, "grad_norm": 6.078181863374826, "learning_rate": 8.641587190631719e-06, "loss": 17.1216, "step": 14394 }, { "epoch": 0.263129032847716, "grad_norm": 6.656127913610376, "learning_rate": 8.641384345335845e-06, "loss": 17.4851, "step": 14395 }, { "epoch": 0.26314731204416253, "grad_norm": 6.16934047646913, "learning_rate": 8.641181487277237e-06, "loss": 17.3669, "step": 14396 }, { "epoch": 0.26316559124060906, "grad_norm": 7.6829119752010495, "learning_rate": 8.640978616456604e-06, "loss": 17.688, "step": 14397 }, { "epoch": 0.2631838704370556, "grad_norm": 7.996643508772909, "learning_rate": 8.640775732874655e-06, "loss": 17.9074, "step": 14398 }, { "epoch": 0.26320214963350214, "grad_norm": 5.867890861865463, "learning_rate": 8.640572836532104e-06, "loss": 17.182, "step": 14399 }, { "epoch": 0.2632204288299486, "grad_norm": 6.574988478382767, "learning_rate": 8.64036992742966e-06, "loss": 17.6623, "step": 14400 }, { "epoch": 0.26323870802639515, "grad_norm": 7.9735831120235074, "learning_rate": 8.640167005568036e-06, "loss": 17.795, "step": 14401 }, { "epoch": 0.2632569872228417, "grad_norm": 6.9479004744618775, "learning_rate": 8.639964070947944e-06, "loss": 17.6584, "step": 14402 }, { "epoch": 0.2632752664192882, "grad_norm": 6.30627480439822, "learning_rate": 8.639761123570093e-06, "loss": 17.3807, "step": 14403 }, { "epoch": 0.26329354561573476, "grad_norm": 6.508813470371485, "learning_rate": 8.639558163435195e-06, "loss": 17.6887, "step": 14404 }, { "epoch": 0.26331182481218124, "grad_norm": 6.034621266277597, "learning_rate": 8.63935519054396e-06, "loss": 17.3219, "step": 14405 }, { "epoch": 0.2633301040086278, "grad_norm": 7.550417473570721, "learning_rate": 8.639152204897103e-06, "loss": 17.4877, "step": 14406 }, { "epoch": 0.2633483832050743, "grad_norm": 5.8813861967069725, "learning_rate": 8.63894920649533e-06, "loss": 17.2511, "step": 14407 }, { "epoch": 0.26336666240152085, "grad_norm": 6.541356247451769, "learning_rate": 8.63874619533936e-06, "loss": 17.7163, "step": 14408 }, { "epoch": 0.2633849415979673, "grad_norm": 6.868842271992227, "learning_rate": 8.638543171429898e-06, "loss": 17.5807, "step": 14409 }, { "epoch": 0.26340322079441386, "grad_norm": 8.45474998240171, "learning_rate": 8.638340134767658e-06, "loss": 17.9626, "step": 14410 }, { "epoch": 0.2634214999908604, "grad_norm": 8.6414741204504, "learning_rate": 8.638137085353353e-06, "loss": 18.3836, "step": 14411 }, { "epoch": 0.26343977918730693, "grad_norm": 8.247130410778336, "learning_rate": 8.63793402318769e-06, "loss": 18.1161, "step": 14412 }, { "epoch": 0.26345805838375347, "grad_norm": 6.67902185171827, "learning_rate": 8.637730948271388e-06, "loss": 17.6532, "step": 14413 }, { "epoch": 0.26347633758019995, "grad_norm": 6.451326450170129, "learning_rate": 8.637527860605153e-06, "loss": 17.7221, "step": 14414 }, { "epoch": 0.2634946167766465, "grad_norm": 7.756004704377866, "learning_rate": 8.637324760189698e-06, "loss": 17.9408, "step": 14415 }, { "epoch": 0.263512895973093, "grad_norm": 7.634821663966823, "learning_rate": 8.637121647025734e-06, "loss": 18.153, "step": 14416 }, { "epoch": 0.26353117516953956, "grad_norm": 7.6466337157005295, "learning_rate": 8.636918521113978e-06, "loss": 18.325, "step": 14417 }, { "epoch": 0.2635494543659861, "grad_norm": 6.582028684780957, "learning_rate": 8.636715382455136e-06, "loss": 17.5692, "step": 14418 }, { "epoch": 0.26356773356243257, "grad_norm": 7.190494107280675, "learning_rate": 8.636512231049921e-06, "loss": 17.8639, "step": 14419 }, { "epoch": 0.2635860127588791, "grad_norm": 6.38178429188656, "learning_rate": 8.636309066899049e-06, "loss": 17.5957, "step": 14420 }, { "epoch": 0.26360429195532564, "grad_norm": 7.037021519255821, "learning_rate": 8.636105890003226e-06, "loss": 17.8983, "step": 14421 }, { "epoch": 0.2636225711517722, "grad_norm": 7.051985242002053, "learning_rate": 8.63590270036317e-06, "loss": 17.7895, "step": 14422 }, { "epoch": 0.2636408503482187, "grad_norm": 8.026578241369034, "learning_rate": 8.635699497979589e-06, "loss": 18.2436, "step": 14423 }, { "epoch": 0.2636591295446652, "grad_norm": 5.57639793423407, "learning_rate": 8.635496282853198e-06, "loss": 17.2686, "step": 14424 }, { "epoch": 0.26367740874111173, "grad_norm": 6.7780316664429385, "learning_rate": 8.635293054984708e-06, "loss": 17.6475, "step": 14425 }, { "epoch": 0.26369568793755827, "grad_norm": 5.959951157596639, "learning_rate": 8.63508981437483e-06, "loss": 17.4411, "step": 14426 }, { "epoch": 0.2637139671340048, "grad_norm": 7.292027672746734, "learning_rate": 8.634886561024278e-06, "loss": 17.8111, "step": 14427 }, { "epoch": 0.26373224633045134, "grad_norm": 6.441026208286518, "learning_rate": 8.634683294933764e-06, "loss": 17.4282, "step": 14428 }, { "epoch": 0.2637505255268978, "grad_norm": 8.410255753178866, "learning_rate": 8.634480016104e-06, "loss": 18.1782, "step": 14429 }, { "epoch": 0.26376880472334435, "grad_norm": 6.094913465751023, "learning_rate": 8.6342767245357e-06, "loss": 17.4715, "step": 14430 }, { "epoch": 0.2637870839197909, "grad_norm": 8.85569749760153, "learning_rate": 8.634073420229576e-06, "loss": 18.1532, "step": 14431 }, { "epoch": 0.2638053631162374, "grad_norm": 5.934754241360311, "learning_rate": 8.63387010318634e-06, "loss": 17.4521, "step": 14432 }, { "epoch": 0.26382364231268396, "grad_norm": 5.7609081418518935, "learning_rate": 8.633666773406703e-06, "loss": 17.3163, "step": 14433 }, { "epoch": 0.26384192150913044, "grad_norm": 7.808406130301736, "learning_rate": 8.63346343089138e-06, "loss": 17.7417, "step": 14434 }, { "epoch": 0.263860200705577, "grad_norm": 7.9984500516837596, "learning_rate": 8.633260075641084e-06, "loss": 17.859, "step": 14435 }, { "epoch": 0.2638784799020235, "grad_norm": 7.958538072619891, "learning_rate": 8.633056707656524e-06, "loss": 17.8181, "step": 14436 }, { "epoch": 0.26389675909847005, "grad_norm": 6.342604782252801, "learning_rate": 8.632853326938417e-06, "loss": 17.5564, "step": 14437 }, { "epoch": 0.2639150382949166, "grad_norm": 6.403760931250704, "learning_rate": 8.632649933487476e-06, "loss": 17.3521, "step": 14438 }, { "epoch": 0.26393331749136306, "grad_norm": 5.973544886025513, "learning_rate": 8.63244652730441e-06, "loss": 17.3415, "step": 14439 }, { "epoch": 0.2639515966878096, "grad_norm": 7.24917701930819, "learning_rate": 8.632243108389935e-06, "loss": 17.8393, "step": 14440 }, { "epoch": 0.26396987588425613, "grad_norm": 6.79616203564851, "learning_rate": 8.632039676744764e-06, "loss": 17.7178, "step": 14441 }, { "epoch": 0.26398815508070267, "grad_norm": 5.588646667658552, "learning_rate": 8.631836232369607e-06, "loss": 17.0608, "step": 14442 }, { "epoch": 0.26400643427714915, "grad_norm": 5.409842674096702, "learning_rate": 8.63163277526518e-06, "loss": 16.9792, "step": 14443 }, { "epoch": 0.2640247134735957, "grad_norm": 8.08944987169309, "learning_rate": 8.631429305432196e-06, "loss": 18.0805, "step": 14444 }, { "epoch": 0.2640429926700422, "grad_norm": 5.948482874138709, "learning_rate": 8.631225822871368e-06, "loss": 17.1899, "step": 14445 }, { "epoch": 0.26406127186648876, "grad_norm": 6.837103304402086, "learning_rate": 8.631022327583407e-06, "loss": 17.5914, "step": 14446 }, { "epoch": 0.2640795510629353, "grad_norm": 5.961297593954145, "learning_rate": 8.630818819569028e-06, "loss": 17.2807, "step": 14447 }, { "epoch": 0.26409783025938177, "grad_norm": 8.932231364435959, "learning_rate": 8.630615298828946e-06, "loss": 18.4577, "step": 14448 }, { "epoch": 0.2641161094558283, "grad_norm": 7.367569768686041, "learning_rate": 8.63041176536387e-06, "loss": 17.9257, "step": 14449 }, { "epoch": 0.26413438865227484, "grad_norm": 7.204328541088552, "learning_rate": 8.630208219174516e-06, "loss": 17.8198, "step": 14450 }, { "epoch": 0.2641526678487214, "grad_norm": 6.21378854137978, "learning_rate": 8.630004660261598e-06, "loss": 17.2751, "step": 14451 }, { "epoch": 0.2641709470451679, "grad_norm": 6.513665379950189, "learning_rate": 8.629801088625829e-06, "loss": 17.2317, "step": 14452 }, { "epoch": 0.2641892262416144, "grad_norm": 7.386715292027365, "learning_rate": 8.629597504267922e-06, "loss": 17.7965, "step": 14453 }, { "epoch": 0.26420750543806093, "grad_norm": 7.460690865408325, "learning_rate": 8.62939390718859e-06, "loss": 17.8174, "step": 14454 }, { "epoch": 0.26422578463450747, "grad_norm": 7.629898885971991, "learning_rate": 8.629190297388548e-06, "loss": 18.054, "step": 14455 }, { "epoch": 0.264244063830954, "grad_norm": 8.84710712682071, "learning_rate": 8.62898667486851e-06, "loss": 17.9528, "step": 14456 }, { "epoch": 0.26426234302740054, "grad_norm": 6.6735793700097235, "learning_rate": 8.628783039629185e-06, "loss": 17.5621, "step": 14457 }, { "epoch": 0.264280622223847, "grad_norm": 6.3392768492060725, "learning_rate": 8.628579391671294e-06, "loss": 17.2754, "step": 14458 }, { "epoch": 0.26429890142029355, "grad_norm": 6.123077425313889, "learning_rate": 8.628375730995546e-06, "loss": 17.5178, "step": 14459 }, { "epoch": 0.2643171806167401, "grad_norm": 6.035249372008523, "learning_rate": 8.628172057602655e-06, "loss": 17.315, "step": 14460 }, { "epoch": 0.2643354598131866, "grad_norm": 6.471941189761329, "learning_rate": 8.627968371493337e-06, "loss": 17.3207, "step": 14461 }, { "epoch": 0.26435373900963316, "grad_norm": 6.960751916740965, "learning_rate": 8.627764672668303e-06, "loss": 17.8303, "step": 14462 }, { "epoch": 0.26437201820607964, "grad_norm": 7.260904605695864, "learning_rate": 8.62756096112827e-06, "loss": 17.9214, "step": 14463 }, { "epoch": 0.2643902974025262, "grad_norm": 7.283451103095669, "learning_rate": 8.62735723687395e-06, "loss": 17.9416, "step": 14464 }, { "epoch": 0.2644085765989727, "grad_norm": 8.309014198787658, "learning_rate": 8.627153499906058e-06, "loss": 18.113, "step": 14465 }, { "epoch": 0.26442685579541925, "grad_norm": 7.576112195195461, "learning_rate": 8.626949750225309e-06, "loss": 18.0264, "step": 14466 }, { "epoch": 0.2644451349918658, "grad_norm": 10.809108033541476, "learning_rate": 8.626745987832415e-06, "loss": 18.7501, "step": 14467 }, { "epoch": 0.26446341418831226, "grad_norm": 7.762386969248576, "learning_rate": 8.626542212728089e-06, "loss": 17.9793, "step": 14468 }, { "epoch": 0.2644816933847588, "grad_norm": 6.39395296041559, "learning_rate": 8.62633842491305e-06, "loss": 17.3432, "step": 14469 }, { "epoch": 0.26449997258120533, "grad_norm": 7.274541419351831, "learning_rate": 8.626134624388008e-06, "loss": 17.459, "step": 14470 }, { "epoch": 0.26451825177765187, "grad_norm": 8.082208749520747, "learning_rate": 8.625930811153679e-06, "loss": 18.1278, "step": 14471 }, { "epoch": 0.2645365309740984, "grad_norm": 7.935183376958134, "learning_rate": 8.625726985210778e-06, "loss": 17.8109, "step": 14472 }, { "epoch": 0.2645548101705449, "grad_norm": 6.301684228265136, "learning_rate": 8.625523146560017e-06, "loss": 17.5847, "step": 14473 }, { "epoch": 0.2645730893669914, "grad_norm": 7.030397305329106, "learning_rate": 8.625319295202113e-06, "loss": 17.9024, "step": 14474 }, { "epoch": 0.26459136856343796, "grad_norm": 6.374695690179215, "learning_rate": 8.625115431137779e-06, "loss": 17.9539, "step": 14475 }, { "epoch": 0.2646096477598845, "grad_norm": 6.083516775600971, "learning_rate": 8.62491155436773e-06, "loss": 17.3094, "step": 14476 }, { "epoch": 0.26462792695633097, "grad_norm": 6.625890657782554, "learning_rate": 8.624707664892683e-06, "loss": 17.8002, "step": 14477 }, { "epoch": 0.2646462061527775, "grad_norm": 5.524092202770874, "learning_rate": 8.624503762713347e-06, "loss": 17.1825, "step": 14478 }, { "epoch": 0.26466448534922404, "grad_norm": 6.2277588844392024, "learning_rate": 8.624299847830441e-06, "loss": 17.3205, "step": 14479 }, { "epoch": 0.2646827645456706, "grad_norm": 5.560057789845338, "learning_rate": 8.624095920244677e-06, "loss": 16.9911, "step": 14480 }, { "epoch": 0.2647010437421171, "grad_norm": 7.52043293493985, "learning_rate": 8.623891979956773e-06, "loss": 18.0707, "step": 14481 }, { "epoch": 0.2647193229385636, "grad_norm": 6.405890877516015, "learning_rate": 8.623688026967443e-06, "loss": 17.3705, "step": 14482 }, { "epoch": 0.26473760213501013, "grad_norm": 5.585600076849905, "learning_rate": 8.623484061277398e-06, "loss": 17.0289, "step": 14483 }, { "epoch": 0.26475588133145667, "grad_norm": 7.335419385052503, "learning_rate": 8.623280082887357e-06, "loss": 18.0216, "step": 14484 }, { "epoch": 0.2647741605279032, "grad_norm": 6.6583291029687715, "learning_rate": 8.623076091798036e-06, "loss": 17.7233, "step": 14485 }, { "epoch": 0.26479243972434974, "grad_norm": 5.613739837826254, "learning_rate": 8.622872088010145e-06, "loss": 17.111, "step": 14486 }, { "epoch": 0.2648107189207962, "grad_norm": 6.1139705866781835, "learning_rate": 8.622668071524403e-06, "loss": 17.5578, "step": 14487 }, { "epoch": 0.26482899811724275, "grad_norm": 8.973784823611156, "learning_rate": 8.622464042341523e-06, "loss": 18.1655, "step": 14488 }, { "epoch": 0.2648472773136893, "grad_norm": 6.036535448526162, "learning_rate": 8.622260000462222e-06, "loss": 17.6154, "step": 14489 }, { "epoch": 0.2648655565101358, "grad_norm": 8.662657752172933, "learning_rate": 8.622055945887213e-06, "loss": 17.9798, "step": 14490 }, { "epoch": 0.26488383570658236, "grad_norm": 6.563700174415634, "learning_rate": 8.621851878617212e-06, "loss": 17.4027, "step": 14491 }, { "epoch": 0.26490211490302884, "grad_norm": 7.4564540130171215, "learning_rate": 8.621647798652935e-06, "loss": 18.0004, "step": 14492 }, { "epoch": 0.2649203940994754, "grad_norm": 6.749344302101685, "learning_rate": 8.621443705995097e-06, "loss": 17.5439, "step": 14493 }, { "epoch": 0.2649386732959219, "grad_norm": 5.882199541576623, "learning_rate": 8.621239600644414e-06, "loss": 17.3339, "step": 14494 }, { "epoch": 0.26495695249236845, "grad_norm": 6.383130482277111, "learning_rate": 8.621035482601599e-06, "loss": 17.7137, "step": 14495 }, { "epoch": 0.264975231688815, "grad_norm": 7.484397129034903, "learning_rate": 8.620831351867368e-06, "loss": 18.2378, "step": 14496 }, { "epoch": 0.26499351088526146, "grad_norm": 5.502308298688108, "learning_rate": 8.620627208442439e-06, "loss": 17.0519, "step": 14497 }, { "epoch": 0.265011790081708, "grad_norm": 6.728858981392801, "learning_rate": 8.620423052327525e-06, "loss": 17.559, "step": 14498 }, { "epoch": 0.26503006927815453, "grad_norm": 6.985670955971041, "learning_rate": 8.620218883523342e-06, "loss": 17.6186, "step": 14499 }, { "epoch": 0.26504834847460107, "grad_norm": 6.758263002532076, "learning_rate": 8.620014702030607e-06, "loss": 17.909, "step": 14500 }, { "epoch": 0.2650666276710476, "grad_norm": 7.034051090086027, "learning_rate": 8.619810507850034e-06, "loss": 18.0054, "step": 14501 }, { "epoch": 0.2650849068674941, "grad_norm": 6.88773969867417, "learning_rate": 8.619606300982339e-06, "loss": 17.8061, "step": 14502 }, { "epoch": 0.2651031860639406, "grad_norm": 7.212959710152913, "learning_rate": 8.619402081428238e-06, "loss": 17.6886, "step": 14503 }, { "epoch": 0.26512146526038716, "grad_norm": 5.972178634538793, "learning_rate": 8.619197849188447e-06, "loss": 17.1257, "step": 14504 }, { "epoch": 0.2651397444568337, "grad_norm": 6.370199677002604, "learning_rate": 8.618993604263683e-06, "loss": 17.6626, "step": 14505 }, { "epoch": 0.26515802365328023, "grad_norm": 7.295011005090328, "learning_rate": 8.618789346654659e-06, "loss": 18.0519, "step": 14506 }, { "epoch": 0.2651763028497267, "grad_norm": 6.9344097714109365, "learning_rate": 8.618585076362092e-06, "loss": 17.8175, "step": 14507 }, { "epoch": 0.26519458204617324, "grad_norm": 6.1592932685006305, "learning_rate": 8.618380793386699e-06, "loss": 17.4501, "step": 14508 }, { "epoch": 0.2652128612426198, "grad_norm": 5.916675937894433, "learning_rate": 8.618176497729197e-06, "loss": 17.4744, "step": 14509 }, { "epoch": 0.2652311404390663, "grad_norm": 7.562135774921811, "learning_rate": 8.617972189390297e-06, "loss": 17.6995, "step": 14510 }, { "epoch": 0.2652494196355128, "grad_norm": 6.258737997475496, "learning_rate": 8.61776786837072e-06, "loss": 17.5063, "step": 14511 }, { "epoch": 0.26526769883195933, "grad_norm": 5.661577547064191, "learning_rate": 8.61756353467118e-06, "loss": 17.2176, "step": 14512 }, { "epoch": 0.26528597802840587, "grad_norm": 7.15404900491835, "learning_rate": 8.617359188292395e-06, "loss": 17.4497, "step": 14513 }, { "epoch": 0.2653042572248524, "grad_norm": 8.59396164793559, "learning_rate": 8.61715482923508e-06, "loss": 18.2819, "step": 14514 }, { "epoch": 0.26532253642129894, "grad_norm": 7.052255945748163, "learning_rate": 8.61695045749995e-06, "loss": 17.7761, "step": 14515 }, { "epoch": 0.2653408156177454, "grad_norm": 6.875029062853168, "learning_rate": 8.616746073087723e-06, "loss": 17.636, "step": 14516 }, { "epoch": 0.26535909481419195, "grad_norm": 6.914797687086098, "learning_rate": 8.616541675999114e-06, "loss": 17.8272, "step": 14517 }, { "epoch": 0.2653773740106385, "grad_norm": 7.397885008124451, "learning_rate": 8.616337266234841e-06, "loss": 18.0082, "step": 14518 }, { "epoch": 0.265395653207085, "grad_norm": 7.338120618208661, "learning_rate": 8.616132843795619e-06, "loss": 17.4697, "step": 14519 }, { "epoch": 0.26541393240353156, "grad_norm": 6.894638124573064, "learning_rate": 8.615928408682167e-06, "loss": 17.6686, "step": 14520 }, { "epoch": 0.26543221159997804, "grad_norm": 6.8820780687649785, "learning_rate": 8.615723960895198e-06, "loss": 17.5165, "step": 14521 }, { "epoch": 0.2654504907964246, "grad_norm": 9.896950902426656, "learning_rate": 8.615519500435432e-06, "loss": 17.7061, "step": 14522 }, { "epoch": 0.2654687699928711, "grad_norm": 6.559777394456265, "learning_rate": 8.615315027303582e-06, "loss": 17.9128, "step": 14523 }, { "epoch": 0.26548704918931765, "grad_norm": 6.956146471135961, "learning_rate": 8.615110541500366e-06, "loss": 17.4917, "step": 14524 }, { "epoch": 0.2655053283857642, "grad_norm": 7.823582414680977, "learning_rate": 8.614906043026503e-06, "loss": 18.1326, "step": 14525 }, { "epoch": 0.26552360758221066, "grad_norm": 7.476708248993483, "learning_rate": 8.614701531882708e-06, "loss": 17.8015, "step": 14526 }, { "epoch": 0.2655418867786572, "grad_norm": 7.054524244592589, "learning_rate": 8.614497008069697e-06, "loss": 17.6633, "step": 14527 }, { "epoch": 0.26556016597510373, "grad_norm": 6.199399833770583, "learning_rate": 8.614292471588188e-06, "loss": 17.1404, "step": 14528 }, { "epoch": 0.26557844517155027, "grad_norm": 5.429267886410322, "learning_rate": 8.614087922438899e-06, "loss": 16.9066, "step": 14529 }, { "epoch": 0.2655967243679968, "grad_norm": 6.745613806224077, "learning_rate": 8.613883360622543e-06, "loss": 17.7029, "step": 14530 }, { "epoch": 0.2656150035644433, "grad_norm": 7.164765055197569, "learning_rate": 8.61367878613984e-06, "loss": 17.398, "step": 14531 }, { "epoch": 0.2656332827608898, "grad_norm": 7.451254490299786, "learning_rate": 8.613474198991508e-06, "loss": 17.8344, "step": 14532 }, { "epoch": 0.26565156195733636, "grad_norm": 6.210960468432399, "learning_rate": 8.61326959917826e-06, "loss": 17.331, "step": 14533 }, { "epoch": 0.2656698411537829, "grad_norm": 6.761316470227943, "learning_rate": 8.613064986700817e-06, "loss": 17.7626, "step": 14534 }, { "epoch": 0.26568812035022943, "grad_norm": 7.566935423976791, "learning_rate": 8.612860361559895e-06, "loss": 17.8178, "step": 14535 }, { "epoch": 0.2657063995466759, "grad_norm": 8.833309756272051, "learning_rate": 8.61265572375621e-06, "loss": 18.8689, "step": 14536 }, { "epoch": 0.26572467874312244, "grad_norm": 6.8978068956033365, "learning_rate": 8.612451073290481e-06, "loss": 17.5856, "step": 14537 }, { "epoch": 0.265742957939569, "grad_norm": 6.444851151410214, "learning_rate": 8.612246410163426e-06, "loss": 17.4258, "step": 14538 }, { "epoch": 0.2657612371360155, "grad_norm": 6.46776566739074, "learning_rate": 8.612041734375758e-06, "loss": 17.569, "step": 14539 }, { "epoch": 0.26577951633246205, "grad_norm": 6.977395888144881, "learning_rate": 8.611837045928199e-06, "loss": 17.8813, "step": 14540 }, { "epoch": 0.26579779552890853, "grad_norm": 6.410358467372472, "learning_rate": 8.611632344821463e-06, "loss": 17.4866, "step": 14541 }, { "epoch": 0.26581607472535507, "grad_norm": 7.950135684060448, "learning_rate": 8.61142763105627e-06, "loss": 18.0721, "step": 14542 }, { "epoch": 0.2658343539218016, "grad_norm": 6.61809402317436, "learning_rate": 8.611222904633336e-06, "loss": 17.6161, "step": 14543 }, { "epoch": 0.26585263311824814, "grad_norm": 7.336321824901097, "learning_rate": 8.61101816555338e-06, "loss": 17.8365, "step": 14544 }, { "epoch": 0.2658709123146946, "grad_norm": 6.737137848233468, "learning_rate": 8.61081341381712e-06, "loss": 17.5722, "step": 14545 }, { "epoch": 0.26588919151114115, "grad_norm": 7.192786767385998, "learning_rate": 8.610608649425269e-06, "loss": 17.6626, "step": 14546 }, { "epoch": 0.2659074707075877, "grad_norm": 6.510261348040814, "learning_rate": 8.61040387237855e-06, "loss": 17.5307, "step": 14547 }, { "epoch": 0.2659257499040342, "grad_norm": 6.578354409706376, "learning_rate": 8.610199082677676e-06, "loss": 17.2704, "step": 14548 }, { "epoch": 0.26594402910048076, "grad_norm": 6.025007721595394, "learning_rate": 8.609994280323371e-06, "loss": 17.1077, "step": 14549 }, { "epoch": 0.26596230829692724, "grad_norm": 5.938124798069925, "learning_rate": 8.609789465316349e-06, "loss": 17.3366, "step": 14550 }, { "epoch": 0.2659805874933738, "grad_norm": 7.9793418290082325, "learning_rate": 8.609584637657327e-06, "loss": 17.2621, "step": 14551 }, { "epoch": 0.2659988666898203, "grad_norm": 7.599806684319761, "learning_rate": 8.609379797347025e-06, "loss": 18.1463, "step": 14552 }, { "epoch": 0.26601714588626685, "grad_norm": 6.192626430350422, "learning_rate": 8.609174944386158e-06, "loss": 17.5365, "step": 14553 }, { "epoch": 0.2660354250827134, "grad_norm": 7.024956925027557, "learning_rate": 8.608970078775448e-06, "loss": 17.8295, "step": 14554 }, { "epoch": 0.26605370427915986, "grad_norm": 7.1205064808478475, "learning_rate": 8.60876520051561e-06, "loss": 17.9813, "step": 14555 }, { "epoch": 0.2660719834756064, "grad_norm": 6.742292643754117, "learning_rate": 8.608560309607364e-06, "loss": 17.7733, "step": 14556 }, { "epoch": 0.26609026267205294, "grad_norm": 6.602844337637779, "learning_rate": 8.608355406051426e-06, "loss": 17.6018, "step": 14557 }, { "epoch": 0.26610854186849947, "grad_norm": 6.674092450200186, "learning_rate": 8.608150489848516e-06, "loss": 17.5333, "step": 14558 }, { "epoch": 0.266126821064946, "grad_norm": 6.330228946805338, "learning_rate": 8.607945560999351e-06, "loss": 17.2829, "step": 14559 }, { "epoch": 0.2661451002613925, "grad_norm": 5.141724277165614, "learning_rate": 8.607740619504651e-06, "loss": 16.9891, "step": 14560 }, { "epoch": 0.266163379457839, "grad_norm": 6.274518320128126, "learning_rate": 8.607535665365133e-06, "loss": 17.0762, "step": 14561 }, { "epoch": 0.26618165865428556, "grad_norm": 7.271786139082388, "learning_rate": 8.607330698581516e-06, "loss": 17.8042, "step": 14562 }, { "epoch": 0.2661999378507321, "grad_norm": 6.7092664226247525, "learning_rate": 8.607125719154517e-06, "loss": 17.6421, "step": 14563 }, { "epoch": 0.26621821704717863, "grad_norm": 7.780958231915097, "learning_rate": 8.606920727084856e-06, "loss": 17.9761, "step": 14564 }, { "epoch": 0.2662364962436251, "grad_norm": 10.250632342629785, "learning_rate": 8.60671572237325e-06, "loss": 18.1369, "step": 14565 }, { "epoch": 0.26625477544007164, "grad_norm": 5.938952848297895, "learning_rate": 8.606510705020418e-06, "loss": 17.3673, "step": 14566 }, { "epoch": 0.2662730546365182, "grad_norm": 6.644777949786896, "learning_rate": 8.606305675027081e-06, "loss": 17.4135, "step": 14567 }, { "epoch": 0.2662913338329647, "grad_norm": 6.9680846421093525, "learning_rate": 8.606100632393955e-06, "loss": 17.6119, "step": 14568 }, { "epoch": 0.26630961302941125, "grad_norm": 7.228445164925551, "learning_rate": 8.605895577121756e-06, "loss": 17.6872, "step": 14569 }, { "epoch": 0.26632789222585773, "grad_norm": 6.793796231481781, "learning_rate": 8.60569050921121e-06, "loss": 17.6463, "step": 14570 }, { "epoch": 0.26634617142230427, "grad_norm": 8.184261291908337, "learning_rate": 8.60548542866303e-06, "loss": 18.0862, "step": 14571 }, { "epoch": 0.2663644506187508, "grad_norm": 6.669093698274262, "learning_rate": 8.605280335477937e-06, "loss": 17.3625, "step": 14572 }, { "epoch": 0.26638272981519734, "grad_norm": 8.022540232457578, "learning_rate": 8.60507522965665e-06, "loss": 17.8668, "step": 14573 }, { "epoch": 0.2664010090116439, "grad_norm": 6.359887614228244, "learning_rate": 8.604870111199884e-06, "loss": 17.3532, "step": 14574 }, { "epoch": 0.26641928820809035, "grad_norm": 6.00556214434223, "learning_rate": 8.604664980108363e-06, "loss": 17.2011, "step": 14575 }, { "epoch": 0.2664375674045369, "grad_norm": 8.240097576013113, "learning_rate": 8.604459836382805e-06, "loss": 18.0064, "step": 14576 }, { "epoch": 0.2664558466009834, "grad_norm": 7.621136000245503, "learning_rate": 8.604254680023926e-06, "loss": 18.0655, "step": 14577 }, { "epoch": 0.26647412579742996, "grad_norm": 7.241932252630637, "learning_rate": 8.604049511032448e-06, "loss": 17.7946, "step": 14578 }, { "epoch": 0.26649240499387644, "grad_norm": 7.38691990934202, "learning_rate": 8.60384432940909e-06, "loss": 18.3702, "step": 14579 }, { "epoch": 0.266510684190323, "grad_norm": 5.840428221561425, "learning_rate": 8.603639135154571e-06, "loss": 17.2816, "step": 14580 }, { "epoch": 0.2665289633867695, "grad_norm": 5.866197550155619, "learning_rate": 8.603433928269607e-06, "loss": 17.1281, "step": 14581 }, { "epoch": 0.26654724258321605, "grad_norm": 5.739027440440411, "learning_rate": 8.603228708754923e-06, "loss": 17.2506, "step": 14582 }, { "epoch": 0.2665655217796626, "grad_norm": 7.235600044854483, "learning_rate": 8.603023476611231e-06, "loss": 18.0437, "step": 14583 }, { "epoch": 0.26658380097610906, "grad_norm": 5.410088168252828, "learning_rate": 8.602818231839258e-06, "loss": 17.0568, "step": 14584 }, { "epoch": 0.2666020801725556, "grad_norm": 5.259849600457909, "learning_rate": 8.602612974439719e-06, "loss": 17.0523, "step": 14585 }, { "epoch": 0.26662035936900214, "grad_norm": 7.771111353775161, "learning_rate": 8.602407704413333e-06, "loss": 18.318, "step": 14586 }, { "epoch": 0.26663863856544867, "grad_norm": 6.016256629883184, "learning_rate": 8.602202421760821e-06, "loss": 17.3069, "step": 14587 }, { "epoch": 0.2666569177618952, "grad_norm": 8.535257209161756, "learning_rate": 8.601997126482902e-06, "loss": 18.725, "step": 14588 }, { "epoch": 0.2666751969583417, "grad_norm": 6.675129458470847, "learning_rate": 8.601791818580296e-06, "loss": 17.8127, "step": 14589 }, { "epoch": 0.2666934761547882, "grad_norm": 7.876609683187753, "learning_rate": 8.60158649805372e-06, "loss": 18.1652, "step": 14590 }, { "epoch": 0.26671175535123476, "grad_norm": 17.921230096264836, "learning_rate": 8.6013811649039e-06, "loss": 17.9632, "step": 14591 }, { "epoch": 0.2667300345476813, "grad_norm": 6.598397901157439, "learning_rate": 8.601175819131548e-06, "loss": 17.505, "step": 14592 }, { "epoch": 0.26674831374412783, "grad_norm": 7.095345991456853, "learning_rate": 8.60097046073739e-06, "loss": 18.1829, "step": 14593 }, { "epoch": 0.2667665929405743, "grad_norm": 9.159927122606996, "learning_rate": 8.600765089722141e-06, "loss": 17.9415, "step": 14594 }, { "epoch": 0.26678487213702085, "grad_norm": 6.433372609963939, "learning_rate": 8.600559706086524e-06, "loss": 17.3582, "step": 14595 }, { "epoch": 0.2668031513334674, "grad_norm": 6.477033924801939, "learning_rate": 8.600354309831256e-06, "loss": 17.3693, "step": 14596 }, { "epoch": 0.2668214305299139, "grad_norm": 7.83575756565574, "learning_rate": 8.60014890095706e-06, "loss": 18.4398, "step": 14597 }, { "epoch": 0.26683970972636045, "grad_norm": 7.317859995714428, "learning_rate": 8.599943479464655e-06, "loss": 17.9391, "step": 14598 }, { "epoch": 0.26685798892280693, "grad_norm": 6.995436993292523, "learning_rate": 8.59973804535476e-06, "loss": 17.8577, "step": 14599 }, { "epoch": 0.26687626811925347, "grad_norm": 5.771502054950156, "learning_rate": 8.599532598628095e-06, "loss": 17.3081, "step": 14600 }, { "epoch": 0.2668945473157, "grad_norm": 6.9322850295424105, "learning_rate": 8.599327139285381e-06, "loss": 17.506, "step": 14601 }, { "epoch": 0.26691282651214654, "grad_norm": 7.608625176015776, "learning_rate": 8.599121667327336e-06, "loss": 17.9819, "step": 14602 }, { "epoch": 0.2669311057085931, "grad_norm": 6.300309634688053, "learning_rate": 8.598916182754684e-06, "loss": 17.4234, "step": 14603 }, { "epoch": 0.26694938490503956, "grad_norm": 6.457807239142948, "learning_rate": 8.598710685568144e-06, "loss": 17.3832, "step": 14604 }, { "epoch": 0.2669676641014861, "grad_norm": 6.317312787942724, "learning_rate": 8.598505175768433e-06, "loss": 17.5229, "step": 14605 }, { "epoch": 0.2669859432979326, "grad_norm": 6.8525647886978245, "learning_rate": 8.598299653356275e-06, "loss": 17.914, "step": 14606 }, { "epoch": 0.26700422249437916, "grad_norm": 6.08648184309682, "learning_rate": 8.598094118332387e-06, "loss": 17.3699, "step": 14607 }, { "epoch": 0.2670225016908257, "grad_norm": 7.168418025498164, "learning_rate": 8.597888570697493e-06, "loss": 17.8875, "step": 14608 }, { "epoch": 0.2670407808872722, "grad_norm": 6.978072260586743, "learning_rate": 8.597683010452313e-06, "loss": 17.8669, "step": 14609 }, { "epoch": 0.2670590600837187, "grad_norm": 5.68928948623838, "learning_rate": 8.597477437597564e-06, "loss": 17.4055, "step": 14610 }, { "epoch": 0.26707733928016525, "grad_norm": 6.7831148775858034, "learning_rate": 8.59727185213397e-06, "loss": 17.8555, "step": 14611 }, { "epoch": 0.2670956184766118, "grad_norm": 6.9694905142115395, "learning_rate": 8.59706625406225e-06, "loss": 17.8434, "step": 14612 }, { "epoch": 0.26711389767305826, "grad_norm": 6.878696532269876, "learning_rate": 8.596860643383124e-06, "loss": 17.6899, "step": 14613 }, { "epoch": 0.2671321768695048, "grad_norm": 5.971757207828341, "learning_rate": 8.596655020097314e-06, "loss": 17.3628, "step": 14614 }, { "epoch": 0.26715045606595134, "grad_norm": 5.842889536628196, "learning_rate": 8.596449384205541e-06, "loss": 17.2973, "step": 14615 }, { "epoch": 0.26716873526239787, "grad_norm": 7.238832715010462, "learning_rate": 8.596243735708525e-06, "loss": 17.9668, "step": 14616 }, { "epoch": 0.2671870144588444, "grad_norm": 7.1924676079170276, "learning_rate": 8.596038074606987e-06, "loss": 17.6888, "step": 14617 }, { "epoch": 0.2672052936552909, "grad_norm": 6.889844749206356, "learning_rate": 8.595832400901645e-06, "loss": 17.5642, "step": 14618 }, { "epoch": 0.2672235728517374, "grad_norm": 7.144385758465032, "learning_rate": 8.595626714593226e-06, "loss": 17.5755, "step": 14619 }, { "epoch": 0.26724185204818396, "grad_norm": 7.062425882232001, "learning_rate": 8.595421015682446e-06, "loss": 17.8704, "step": 14620 }, { "epoch": 0.2672601312446305, "grad_norm": 6.977461440582134, "learning_rate": 8.595215304170026e-06, "loss": 17.7499, "step": 14621 }, { "epoch": 0.26727841044107703, "grad_norm": 7.886577020516829, "learning_rate": 8.59500958005669e-06, "loss": 17.8037, "step": 14622 }, { "epoch": 0.2672966896375235, "grad_norm": 6.188005188647903, "learning_rate": 8.594803843343158e-06, "loss": 17.3182, "step": 14623 }, { "epoch": 0.26731496883397005, "grad_norm": 7.089846529298709, "learning_rate": 8.59459809403015e-06, "loss": 17.7172, "step": 14624 }, { "epoch": 0.2673332480304166, "grad_norm": 8.26504148083267, "learning_rate": 8.594392332118385e-06, "loss": 18.5234, "step": 14625 }, { "epoch": 0.2673515272268631, "grad_norm": 6.886041654859538, "learning_rate": 8.594186557608587e-06, "loss": 17.6315, "step": 14626 }, { "epoch": 0.26736980642330965, "grad_norm": 8.288044682399347, "learning_rate": 8.59398077050148e-06, "loss": 17.97, "step": 14627 }, { "epoch": 0.26738808561975613, "grad_norm": 7.169456483789491, "learning_rate": 8.59377497079778e-06, "loss": 18.0894, "step": 14628 }, { "epoch": 0.26740636481620267, "grad_norm": 5.904160673266783, "learning_rate": 8.593569158498213e-06, "loss": 17.3124, "step": 14629 }, { "epoch": 0.2674246440126492, "grad_norm": 6.192390315062664, "learning_rate": 8.593363333603498e-06, "loss": 17.4309, "step": 14630 }, { "epoch": 0.26744292320909574, "grad_norm": 6.815067021765786, "learning_rate": 8.593157496114353e-06, "loss": 17.8669, "step": 14631 }, { "epoch": 0.2674612024055423, "grad_norm": 9.663353977931491, "learning_rate": 8.592951646031504e-06, "loss": 18.0593, "step": 14632 }, { "epoch": 0.26747948160198876, "grad_norm": 7.027431175448057, "learning_rate": 8.592745783355673e-06, "loss": 17.511, "step": 14633 }, { "epoch": 0.2674977607984353, "grad_norm": 7.165705104412249, "learning_rate": 8.592539908087578e-06, "loss": 17.9487, "step": 14634 }, { "epoch": 0.2675160399948818, "grad_norm": 7.1448749636832725, "learning_rate": 8.592334020227943e-06, "loss": 17.998, "step": 14635 }, { "epoch": 0.26753431919132836, "grad_norm": 7.8146278493319, "learning_rate": 8.59212811977749e-06, "loss": 17.9695, "step": 14636 }, { "epoch": 0.2675525983877749, "grad_norm": 5.7202952675042384, "learning_rate": 8.591922206736937e-06, "loss": 17.4506, "step": 14637 }, { "epoch": 0.2675708775842214, "grad_norm": 6.279828032073376, "learning_rate": 8.591716281107013e-06, "loss": 17.7869, "step": 14638 }, { "epoch": 0.2675891567806679, "grad_norm": 5.402484171696131, "learning_rate": 8.59151034288843e-06, "loss": 17.0872, "step": 14639 }, { "epoch": 0.26760743597711445, "grad_norm": 6.172026518418237, "learning_rate": 8.591304392081917e-06, "loss": 17.471, "step": 14640 }, { "epoch": 0.267625715173561, "grad_norm": 6.823439932628753, "learning_rate": 8.591098428688194e-06, "loss": 18.1081, "step": 14641 }, { "epoch": 0.2676439943700075, "grad_norm": 6.5398142477364924, "learning_rate": 8.590892452707983e-06, "loss": 17.6611, "step": 14642 }, { "epoch": 0.267662273566454, "grad_norm": 5.766165614977785, "learning_rate": 8.590686464142006e-06, "loss": 17.4685, "step": 14643 }, { "epoch": 0.26768055276290054, "grad_norm": 6.625144022004418, "learning_rate": 8.590480462990983e-06, "loss": 18.028, "step": 14644 }, { "epoch": 0.26769883195934707, "grad_norm": 6.958001094920115, "learning_rate": 8.590274449255638e-06, "loss": 17.6994, "step": 14645 }, { "epoch": 0.2677171111557936, "grad_norm": 6.206127260729637, "learning_rate": 8.590068422936693e-06, "loss": 17.5663, "step": 14646 }, { "epoch": 0.2677353903522401, "grad_norm": 5.524311365369359, "learning_rate": 8.589862384034869e-06, "loss": 17.2548, "step": 14647 }, { "epoch": 0.2677536695486866, "grad_norm": 6.794287230566698, "learning_rate": 8.589656332550888e-06, "loss": 17.6116, "step": 14648 }, { "epoch": 0.26777194874513316, "grad_norm": 6.011601703938022, "learning_rate": 8.589450268485475e-06, "loss": 17.4124, "step": 14649 }, { "epoch": 0.2677902279415797, "grad_norm": 7.3348265994159, "learning_rate": 8.58924419183935e-06, "loss": 18.0039, "step": 14650 }, { "epoch": 0.26780850713802623, "grad_norm": 6.469191733093986, "learning_rate": 8.589038102613235e-06, "loss": 17.4543, "step": 14651 }, { "epoch": 0.2678267863344727, "grad_norm": 6.734930912931436, "learning_rate": 8.588832000807852e-06, "loss": 18.0038, "step": 14652 }, { "epoch": 0.26784506553091925, "grad_norm": 6.841974117358319, "learning_rate": 8.588625886423926e-06, "loss": 17.8994, "step": 14653 }, { "epoch": 0.2678633447273658, "grad_norm": 6.354695599013013, "learning_rate": 8.588419759462176e-06, "loss": 17.1604, "step": 14654 }, { "epoch": 0.2678816239238123, "grad_norm": 6.437731998455454, "learning_rate": 8.588213619923328e-06, "loss": 17.7682, "step": 14655 }, { "epoch": 0.26789990312025885, "grad_norm": 6.279993746879131, "learning_rate": 8.588007467808101e-06, "loss": 17.2485, "step": 14656 }, { "epoch": 0.26791818231670533, "grad_norm": 8.061355332527341, "learning_rate": 8.58780130311722e-06, "loss": 18.0273, "step": 14657 }, { "epoch": 0.26793646151315187, "grad_norm": 5.96166967807284, "learning_rate": 8.587595125851407e-06, "loss": 17.2501, "step": 14658 }, { "epoch": 0.2679547407095984, "grad_norm": 7.3753159021860055, "learning_rate": 8.587388936011386e-06, "loss": 18.1227, "step": 14659 }, { "epoch": 0.26797301990604494, "grad_norm": 6.456202514061895, "learning_rate": 8.587182733597874e-06, "loss": 17.478, "step": 14660 }, { "epoch": 0.2679912991024915, "grad_norm": 6.459535256272963, "learning_rate": 8.5869765186116e-06, "loss": 17.5381, "step": 14661 }, { "epoch": 0.26800957829893796, "grad_norm": 5.474720377856238, "learning_rate": 8.586770291053286e-06, "loss": 17.1773, "step": 14662 }, { "epoch": 0.2680278574953845, "grad_norm": 6.648757690869516, "learning_rate": 8.586564050923651e-06, "loss": 17.7136, "step": 14663 }, { "epoch": 0.268046136691831, "grad_norm": 6.966701301159261, "learning_rate": 8.586357798223421e-06, "loss": 17.5491, "step": 14664 }, { "epoch": 0.26806441588827756, "grad_norm": 8.349272246970072, "learning_rate": 8.586151532953319e-06, "loss": 18.0761, "step": 14665 }, { "epoch": 0.2680826950847241, "grad_norm": 8.472676963353326, "learning_rate": 8.585945255114065e-06, "loss": 18.4994, "step": 14666 }, { "epoch": 0.2681009742811706, "grad_norm": 6.414726136351401, "learning_rate": 8.585738964706387e-06, "loss": 17.297, "step": 14667 }, { "epoch": 0.2681192534776171, "grad_norm": 6.668931944589009, "learning_rate": 8.585532661731002e-06, "loss": 17.6723, "step": 14668 }, { "epoch": 0.26813753267406365, "grad_norm": 6.954940343528749, "learning_rate": 8.585326346188639e-06, "loss": 17.718, "step": 14669 }, { "epoch": 0.2681558118705102, "grad_norm": 5.9340071981898035, "learning_rate": 8.585120018080016e-06, "loss": 17.4597, "step": 14670 }, { "epoch": 0.2681740910669567, "grad_norm": 7.326870791647553, "learning_rate": 8.58491367740586e-06, "loss": 18.0068, "step": 14671 }, { "epoch": 0.2681923702634032, "grad_norm": 7.827022488528485, "learning_rate": 8.584707324166892e-06, "loss": 17.7907, "step": 14672 }, { "epoch": 0.26821064945984974, "grad_norm": 10.011623012769867, "learning_rate": 8.584500958363835e-06, "loss": 17.5316, "step": 14673 }, { "epoch": 0.2682289286562963, "grad_norm": 6.798564723673695, "learning_rate": 8.584294579997414e-06, "loss": 17.4025, "step": 14674 }, { "epoch": 0.2682472078527428, "grad_norm": 7.56727091434625, "learning_rate": 8.584088189068352e-06, "loss": 17.8541, "step": 14675 }, { "epoch": 0.26826548704918934, "grad_norm": 7.037791027692974, "learning_rate": 8.583881785577372e-06, "loss": 17.7043, "step": 14676 }, { "epoch": 0.2682837662456358, "grad_norm": 7.1995554718453185, "learning_rate": 8.583675369525196e-06, "loss": 17.6086, "step": 14677 }, { "epoch": 0.26830204544208236, "grad_norm": 7.771830931547811, "learning_rate": 8.583468940912549e-06, "loss": 18.2094, "step": 14678 }, { "epoch": 0.2683203246385289, "grad_norm": 6.473819895791835, "learning_rate": 8.583262499740156e-06, "loss": 17.6683, "step": 14679 }, { "epoch": 0.26833860383497543, "grad_norm": 5.336545610883745, "learning_rate": 8.583056046008738e-06, "loss": 17.047, "step": 14680 }, { "epoch": 0.2683568830314219, "grad_norm": 6.476933158971933, "learning_rate": 8.58284957971902e-06, "loss": 17.7432, "step": 14681 }, { "epoch": 0.26837516222786845, "grad_norm": 7.194844716455216, "learning_rate": 8.582643100871723e-06, "loss": 18.0126, "step": 14682 }, { "epoch": 0.268393441424315, "grad_norm": 9.837567698985303, "learning_rate": 8.582436609467574e-06, "loss": 18.3976, "step": 14683 }, { "epoch": 0.2684117206207615, "grad_norm": 6.302737098745762, "learning_rate": 8.582230105507297e-06, "loss": 17.3444, "step": 14684 }, { "epoch": 0.26842999981720805, "grad_norm": 6.031561994737572, "learning_rate": 8.582023588991612e-06, "loss": 17.243, "step": 14685 }, { "epoch": 0.26844827901365453, "grad_norm": 6.7063610725854135, "learning_rate": 8.581817059921246e-06, "loss": 17.7287, "step": 14686 }, { "epoch": 0.26846655821010107, "grad_norm": 5.076633321812692, "learning_rate": 8.581610518296923e-06, "loss": 17.0169, "step": 14687 }, { "epoch": 0.2684848374065476, "grad_norm": 7.5560594426511285, "learning_rate": 8.581403964119366e-06, "loss": 18.364, "step": 14688 }, { "epoch": 0.26850311660299414, "grad_norm": 6.224973921589403, "learning_rate": 8.581197397389299e-06, "loss": 17.4573, "step": 14689 }, { "epoch": 0.2685213957994407, "grad_norm": 7.531539240589316, "learning_rate": 8.580990818107444e-06, "loss": 18.3858, "step": 14690 }, { "epoch": 0.26853967499588716, "grad_norm": 8.32563910370348, "learning_rate": 8.580784226274527e-06, "loss": 18.243, "step": 14691 }, { "epoch": 0.2685579541923337, "grad_norm": 6.877877813170907, "learning_rate": 8.580577621891274e-06, "loss": 17.7593, "step": 14692 }, { "epoch": 0.26857623338878023, "grad_norm": 5.580909439182357, "learning_rate": 8.580371004958406e-06, "loss": 17.2123, "step": 14693 }, { "epoch": 0.26859451258522676, "grad_norm": 7.284307503234026, "learning_rate": 8.580164375476649e-06, "loss": 17.8056, "step": 14694 }, { "epoch": 0.2686127917816733, "grad_norm": 6.045374886150081, "learning_rate": 8.579957733446725e-06, "loss": 17.376, "step": 14695 }, { "epoch": 0.2686310709781198, "grad_norm": 5.784900666939494, "learning_rate": 8.57975107886936e-06, "loss": 16.9846, "step": 14696 }, { "epoch": 0.2686493501745663, "grad_norm": 5.744827589367595, "learning_rate": 8.579544411745279e-06, "loss": 17.1729, "step": 14697 }, { "epoch": 0.26866762937101285, "grad_norm": 6.333080325574875, "learning_rate": 8.579337732075207e-06, "loss": 17.565, "step": 14698 }, { "epoch": 0.2686859085674594, "grad_norm": 7.097610393011616, "learning_rate": 8.579131039859865e-06, "loss": 18.1011, "step": 14699 }, { "epoch": 0.2687041877639059, "grad_norm": 6.688524342749437, "learning_rate": 8.578924335099979e-06, "loss": 17.5679, "step": 14700 }, { "epoch": 0.2687224669603524, "grad_norm": 7.050501574071304, "learning_rate": 8.578717617796275e-06, "loss": 17.8998, "step": 14701 }, { "epoch": 0.26874074615679894, "grad_norm": 6.523816221993048, "learning_rate": 8.578510887949475e-06, "loss": 17.5661, "step": 14702 }, { "epoch": 0.2687590253532455, "grad_norm": 7.428933579013749, "learning_rate": 8.578304145560306e-06, "loss": 17.7928, "step": 14703 }, { "epoch": 0.268777304549692, "grad_norm": 6.858388721237472, "learning_rate": 8.578097390629491e-06, "loss": 17.6887, "step": 14704 }, { "epoch": 0.26879558374613854, "grad_norm": 6.412739863494405, "learning_rate": 8.577890623157754e-06, "loss": 17.45, "step": 14705 }, { "epoch": 0.268813862942585, "grad_norm": 6.470617535701904, "learning_rate": 8.577683843145823e-06, "loss": 17.5752, "step": 14706 }, { "epoch": 0.26883214213903156, "grad_norm": 6.746735318473387, "learning_rate": 8.577477050594419e-06, "loss": 17.7946, "step": 14707 }, { "epoch": 0.2688504213354781, "grad_norm": 5.971641974763012, "learning_rate": 8.57727024550427e-06, "loss": 17.4674, "step": 14708 }, { "epoch": 0.26886870053192463, "grad_norm": 6.175575672423403, "learning_rate": 8.577063427876096e-06, "loss": 17.384, "step": 14709 }, { "epoch": 0.26888697972837117, "grad_norm": 7.383734868781345, "learning_rate": 8.576856597710628e-06, "loss": 17.8635, "step": 14710 }, { "epoch": 0.26890525892481765, "grad_norm": 12.295733493486797, "learning_rate": 8.576649755008587e-06, "loss": 18.6965, "step": 14711 }, { "epoch": 0.2689235381212642, "grad_norm": 6.466883236775337, "learning_rate": 8.576442899770699e-06, "loss": 17.5949, "step": 14712 }, { "epoch": 0.2689418173177107, "grad_norm": 7.800539756039745, "learning_rate": 8.576236031997689e-06, "loss": 17.9315, "step": 14713 }, { "epoch": 0.26896009651415725, "grad_norm": 6.894007335467035, "learning_rate": 8.576029151690282e-06, "loss": 17.6541, "step": 14714 }, { "epoch": 0.26897837571060373, "grad_norm": 6.148859316110533, "learning_rate": 8.575822258849203e-06, "loss": 17.4111, "step": 14715 }, { "epoch": 0.26899665490705027, "grad_norm": 6.58256675453406, "learning_rate": 8.575615353475178e-06, "loss": 17.6077, "step": 14716 }, { "epoch": 0.2690149341034968, "grad_norm": 7.441271575092082, "learning_rate": 8.57540843556893e-06, "loss": 17.9539, "step": 14717 }, { "epoch": 0.26903321329994334, "grad_norm": 5.361833844903456, "learning_rate": 8.575201505131188e-06, "loss": 16.9696, "step": 14718 }, { "epoch": 0.2690514924963899, "grad_norm": 5.678366120117593, "learning_rate": 8.574994562162672e-06, "loss": 17.3139, "step": 14719 }, { "epoch": 0.26906977169283636, "grad_norm": 6.0893837231018715, "learning_rate": 8.574787606664115e-06, "loss": 17.4303, "step": 14720 }, { "epoch": 0.2690880508892829, "grad_norm": 6.978650869235168, "learning_rate": 8.574580638636233e-06, "loss": 17.7617, "step": 14721 }, { "epoch": 0.26910633008572943, "grad_norm": 6.399336943829046, "learning_rate": 8.574373658079758e-06, "loss": 17.4543, "step": 14722 }, { "epoch": 0.26912460928217596, "grad_norm": 6.7738837958731795, "learning_rate": 8.574166664995413e-06, "loss": 17.5596, "step": 14723 }, { "epoch": 0.2691428884786225, "grad_norm": 6.833367457168881, "learning_rate": 8.573959659383923e-06, "loss": 17.2438, "step": 14724 }, { "epoch": 0.269161167675069, "grad_norm": 6.853494356915995, "learning_rate": 8.573752641246018e-06, "loss": 17.9752, "step": 14725 }, { "epoch": 0.2691794468715155, "grad_norm": 6.813532429082994, "learning_rate": 8.573545610582416e-06, "loss": 17.7973, "step": 14726 }, { "epoch": 0.26919772606796205, "grad_norm": 7.404253275459462, "learning_rate": 8.57333856739385e-06, "loss": 18.1289, "step": 14727 }, { "epoch": 0.2692160052644086, "grad_norm": 5.67083780852939, "learning_rate": 8.57313151168104e-06, "loss": 17.1733, "step": 14728 }, { "epoch": 0.2692342844608551, "grad_norm": 6.618964024735214, "learning_rate": 8.572924443444714e-06, "loss": 17.6785, "step": 14729 }, { "epoch": 0.2692525636573016, "grad_norm": 4.780337707127116, "learning_rate": 8.572717362685601e-06, "loss": 16.988, "step": 14730 }, { "epoch": 0.26927084285374814, "grad_norm": 6.151126655675175, "learning_rate": 8.57251026940442e-06, "loss": 17.3929, "step": 14731 }, { "epoch": 0.2692891220501947, "grad_norm": 6.235799780952094, "learning_rate": 8.572303163601902e-06, "loss": 17.585, "step": 14732 }, { "epoch": 0.2693074012466412, "grad_norm": 5.437297085695997, "learning_rate": 8.572096045278773e-06, "loss": 17.1169, "step": 14733 }, { "epoch": 0.26932568044308774, "grad_norm": 7.3332844311428484, "learning_rate": 8.571888914435756e-06, "loss": 17.8667, "step": 14734 }, { "epoch": 0.2693439596395342, "grad_norm": 6.5775330738577305, "learning_rate": 8.571681771073576e-06, "loss": 17.3096, "step": 14735 }, { "epoch": 0.26936223883598076, "grad_norm": 6.474062535106741, "learning_rate": 8.571474615192965e-06, "loss": 17.3985, "step": 14736 }, { "epoch": 0.2693805180324273, "grad_norm": 6.667701680170781, "learning_rate": 8.571267446794643e-06, "loss": 17.4353, "step": 14737 }, { "epoch": 0.26939879722887383, "grad_norm": 6.9320060803068, "learning_rate": 8.57106026587934e-06, "loss": 17.827, "step": 14738 }, { "epoch": 0.26941707642532037, "grad_norm": 6.716519546581107, "learning_rate": 8.57085307244778e-06, "loss": 17.6117, "step": 14739 }, { "epoch": 0.26943535562176685, "grad_norm": 6.790569901350317, "learning_rate": 8.57064586650069e-06, "loss": 17.2713, "step": 14740 }, { "epoch": 0.2694536348182134, "grad_norm": 7.202666511234887, "learning_rate": 8.570438648038795e-06, "loss": 17.6958, "step": 14741 }, { "epoch": 0.2694719140146599, "grad_norm": 6.375522015279112, "learning_rate": 8.570231417062824e-06, "loss": 17.4966, "step": 14742 }, { "epoch": 0.26949019321110645, "grad_norm": 6.551655264635537, "learning_rate": 8.570024173573502e-06, "loss": 17.4394, "step": 14743 }, { "epoch": 0.269508472407553, "grad_norm": 7.195717947624498, "learning_rate": 8.569816917571553e-06, "loss": 17.6685, "step": 14744 }, { "epoch": 0.26952675160399947, "grad_norm": 5.360594593921079, "learning_rate": 8.569609649057706e-06, "loss": 16.7244, "step": 14745 }, { "epoch": 0.269545030800446, "grad_norm": 6.824536535650638, "learning_rate": 8.569402368032688e-06, "loss": 17.8826, "step": 14746 }, { "epoch": 0.26956330999689254, "grad_norm": 7.403124834009568, "learning_rate": 8.569195074497224e-06, "loss": 17.7747, "step": 14747 }, { "epoch": 0.2695815891933391, "grad_norm": 7.319336075058227, "learning_rate": 8.56898776845204e-06, "loss": 17.659, "step": 14748 }, { "epoch": 0.26959986838978556, "grad_norm": 5.614063931073716, "learning_rate": 8.568780449897866e-06, "loss": 17.0689, "step": 14749 }, { "epoch": 0.2696181475862321, "grad_norm": 6.691994166706363, "learning_rate": 8.568573118835423e-06, "loss": 17.5283, "step": 14750 }, { "epoch": 0.26963642678267863, "grad_norm": 8.027288059140304, "learning_rate": 8.568365775265443e-06, "loss": 17.7729, "step": 14751 }, { "epoch": 0.26965470597912516, "grad_norm": 5.625788602106047, "learning_rate": 8.56815841918865e-06, "loss": 17.3177, "step": 14752 }, { "epoch": 0.2696729851755717, "grad_norm": 6.418223505014459, "learning_rate": 8.567951050605771e-06, "loss": 17.3564, "step": 14753 }, { "epoch": 0.2696912643720182, "grad_norm": 6.191659165789929, "learning_rate": 8.567743669517534e-06, "loss": 17.565, "step": 14754 }, { "epoch": 0.2697095435684647, "grad_norm": 6.952676833930705, "learning_rate": 8.567536275924666e-06, "loss": 17.789, "step": 14755 }, { "epoch": 0.26972782276491125, "grad_norm": 6.728365928300831, "learning_rate": 8.567328869827891e-06, "loss": 17.4448, "step": 14756 }, { "epoch": 0.2697461019613578, "grad_norm": 6.9397302438025665, "learning_rate": 8.56712145122794e-06, "loss": 17.8716, "step": 14757 }, { "epoch": 0.2697643811578043, "grad_norm": 6.421020196663757, "learning_rate": 8.566914020125536e-06, "loss": 17.47, "step": 14758 }, { "epoch": 0.2697826603542508, "grad_norm": 6.311965000870438, "learning_rate": 8.566706576521407e-06, "loss": 17.5558, "step": 14759 }, { "epoch": 0.26980093955069734, "grad_norm": 6.596533882459412, "learning_rate": 8.566499120416283e-06, "loss": 17.5017, "step": 14760 }, { "epoch": 0.2698192187471439, "grad_norm": 6.058452312693287, "learning_rate": 8.566291651810887e-06, "loss": 16.9859, "step": 14761 }, { "epoch": 0.2698374979435904, "grad_norm": 8.718469231759896, "learning_rate": 8.56608417070595e-06, "loss": 18.3771, "step": 14762 }, { "epoch": 0.26985577714003695, "grad_norm": 5.622414403765055, "learning_rate": 8.565876677102198e-06, "loss": 17.2049, "step": 14763 }, { "epoch": 0.2698740563364834, "grad_norm": 6.467884336016666, "learning_rate": 8.565669171000357e-06, "loss": 17.454, "step": 14764 }, { "epoch": 0.26989233553292996, "grad_norm": 6.698985687010132, "learning_rate": 8.565461652401155e-06, "loss": 17.6975, "step": 14765 }, { "epoch": 0.2699106147293765, "grad_norm": 6.320444788794005, "learning_rate": 8.565254121305318e-06, "loss": 17.2421, "step": 14766 }, { "epoch": 0.26992889392582303, "grad_norm": 7.469850399429338, "learning_rate": 8.565046577713576e-06, "loss": 17.7894, "step": 14767 }, { "epoch": 0.26994717312226957, "grad_norm": 8.681383457387533, "learning_rate": 8.564839021626653e-06, "loss": 18.0657, "step": 14768 }, { "epoch": 0.26996545231871605, "grad_norm": 6.303465573667526, "learning_rate": 8.564631453045283e-06, "loss": 17.695, "step": 14769 }, { "epoch": 0.2699837315151626, "grad_norm": 6.587916498995225, "learning_rate": 8.564423871970185e-06, "loss": 17.5307, "step": 14770 }, { "epoch": 0.2700020107116091, "grad_norm": 8.782459938025932, "learning_rate": 8.564216278402092e-06, "loss": 18.4805, "step": 14771 }, { "epoch": 0.27002028990805566, "grad_norm": 6.264035444124062, "learning_rate": 8.564008672341731e-06, "loss": 17.5215, "step": 14772 }, { "epoch": 0.2700385691045022, "grad_norm": 6.1909731522116225, "learning_rate": 8.563801053789828e-06, "loss": 17.3149, "step": 14773 }, { "epoch": 0.27005684830094867, "grad_norm": 7.125301342293887, "learning_rate": 8.563593422747112e-06, "loss": 18.0346, "step": 14774 }, { "epoch": 0.2700751274973952, "grad_norm": 8.09012375487751, "learning_rate": 8.563385779214308e-06, "loss": 18.1556, "step": 14775 }, { "epoch": 0.27009340669384174, "grad_norm": 6.838611555632258, "learning_rate": 8.563178123192148e-06, "loss": 17.4825, "step": 14776 }, { "epoch": 0.2701116858902883, "grad_norm": 7.091593604846901, "learning_rate": 8.562970454681359e-06, "loss": 17.9692, "step": 14777 }, { "epoch": 0.2701299650867348, "grad_norm": 27.42424937940139, "learning_rate": 8.562762773682667e-06, "loss": 18.6833, "step": 14778 }, { "epoch": 0.2701482442831813, "grad_norm": 6.105921118984554, "learning_rate": 8.5625550801968e-06, "loss": 17.5999, "step": 14779 }, { "epoch": 0.27016652347962783, "grad_norm": 7.891322010519658, "learning_rate": 8.562347374224487e-06, "loss": 18.0696, "step": 14780 }, { "epoch": 0.27018480267607436, "grad_norm": 5.089558546095618, "learning_rate": 8.562139655766456e-06, "loss": 17.0662, "step": 14781 }, { "epoch": 0.2702030818725209, "grad_norm": 6.964422777174042, "learning_rate": 8.561931924823433e-06, "loss": 17.9785, "step": 14782 }, { "epoch": 0.2702213610689674, "grad_norm": 6.644066222632282, "learning_rate": 8.56172418139615e-06, "loss": 17.6049, "step": 14783 }, { "epoch": 0.2702396402654139, "grad_norm": 6.717271400281902, "learning_rate": 8.56151642548533e-06, "loss": 17.7115, "step": 14784 }, { "epoch": 0.27025791946186045, "grad_norm": 6.717954264561899, "learning_rate": 8.561308657091707e-06, "loss": 17.4734, "step": 14785 }, { "epoch": 0.270276198658307, "grad_norm": 7.859560363688032, "learning_rate": 8.561100876216004e-06, "loss": 18.0413, "step": 14786 }, { "epoch": 0.2702944778547535, "grad_norm": 6.054518782247669, "learning_rate": 8.560893082858952e-06, "loss": 17.5724, "step": 14787 }, { "epoch": 0.2703127570512, "grad_norm": 6.357519366751193, "learning_rate": 8.560685277021278e-06, "loss": 17.5095, "step": 14788 }, { "epoch": 0.27033103624764654, "grad_norm": 6.419423602347501, "learning_rate": 8.560477458703714e-06, "loss": 17.3069, "step": 14789 }, { "epoch": 0.2703493154440931, "grad_norm": 5.996425424062401, "learning_rate": 8.560269627906983e-06, "loss": 17.3726, "step": 14790 }, { "epoch": 0.2703675946405396, "grad_norm": 7.306677999264691, "learning_rate": 8.560061784631816e-06, "loss": 18.0985, "step": 14791 }, { "epoch": 0.27038587383698615, "grad_norm": 7.643579111586635, "learning_rate": 8.559853928878941e-06, "loss": 18.2205, "step": 14792 }, { "epoch": 0.2704041530334326, "grad_norm": 7.4952996707611, "learning_rate": 8.559646060649086e-06, "loss": 18.0018, "step": 14793 }, { "epoch": 0.27042243222987916, "grad_norm": 6.56709098086221, "learning_rate": 8.559438179942985e-06, "loss": 17.5855, "step": 14794 }, { "epoch": 0.2704407114263257, "grad_norm": 6.906590166390661, "learning_rate": 8.559230286761359e-06, "loss": 17.7104, "step": 14795 }, { "epoch": 0.27045899062277223, "grad_norm": 7.337185897035148, "learning_rate": 8.55902238110494e-06, "loss": 17.888, "step": 14796 }, { "epoch": 0.27047726981921877, "grad_norm": 7.5890777682589965, "learning_rate": 8.558814462974457e-06, "loss": 17.9747, "step": 14797 }, { "epoch": 0.27049554901566525, "grad_norm": 6.966595784185228, "learning_rate": 8.558606532370635e-06, "loss": 17.587, "step": 14798 }, { "epoch": 0.2705138282121118, "grad_norm": 6.432904766596979, "learning_rate": 8.55839858929421e-06, "loss": 17.7732, "step": 14799 }, { "epoch": 0.2705321074085583, "grad_norm": 6.468720967410889, "learning_rate": 8.558190633745904e-06, "loss": 17.4364, "step": 14800 }, { "epoch": 0.27055038660500486, "grad_norm": 7.185045518131907, "learning_rate": 8.55798266572645e-06, "loss": 17.6472, "step": 14801 }, { "epoch": 0.2705686658014514, "grad_norm": 5.51531722764198, "learning_rate": 8.557774685236575e-06, "loss": 17.1683, "step": 14802 }, { "epoch": 0.27058694499789787, "grad_norm": 8.265389929871407, "learning_rate": 8.557566692277008e-06, "loss": 18.4747, "step": 14803 }, { "epoch": 0.2706052241943444, "grad_norm": 6.174231177908045, "learning_rate": 8.55735868684848e-06, "loss": 17.317, "step": 14804 }, { "epoch": 0.27062350339079094, "grad_norm": 5.838048605328569, "learning_rate": 8.557150668951717e-06, "loss": 17.4088, "step": 14805 }, { "epoch": 0.2706417825872375, "grad_norm": 6.0436949025897615, "learning_rate": 8.55694263858745e-06, "loss": 17.2727, "step": 14806 }, { "epoch": 0.270660061783684, "grad_norm": 6.548063149899086, "learning_rate": 8.556734595756409e-06, "loss": 17.5251, "step": 14807 }, { "epoch": 0.2706783409801305, "grad_norm": 8.060561911075787, "learning_rate": 8.55652654045932e-06, "loss": 18.448, "step": 14808 }, { "epoch": 0.27069662017657703, "grad_norm": 6.624057521585169, "learning_rate": 8.556318472696915e-06, "loss": 17.572, "step": 14809 }, { "epoch": 0.27071489937302357, "grad_norm": 6.718039617867972, "learning_rate": 8.556110392469923e-06, "loss": 17.7396, "step": 14810 }, { "epoch": 0.2707331785694701, "grad_norm": 6.602064901695705, "learning_rate": 8.555902299779071e-06, "loss": 17.6202, "step": 14811 }, { "epoch": 0.27075145776591664, "grad_norm": 5.040929278256974, "learning_rate": 8.55569419462509e-06, "loss": 17.0216, "step": 14812 }, { "epoch": 0.2707697369623631, "grad_norm": 5.99684895534528, "learning_rate": 8.55548607700871e-06, "loss": 17.267, "step": 14813 }, { "epoch": 0.27078801615880965, "grad_norm": 7.119770017639812, "learning_rate": 8.55527794693066e-06, "loss": 18.0201, "step": 14814 }, { "epoch": 0.2708062953552562, "grad_norm": 6.668933993092999, "learning_rate": 8.55506980439167e-06, "loss": 17.7468, "step": 14815 }, { "epoch": 0.2708245745517027, "grad_norm": 7.787840570922672, "learning_rate": 8.554861649392468e-06, "loss": 17.9452, "step": 14816 }, { "epoch": 0.2708428537481492, "grad_norm": 9.98440755798373, "learning_rate": 8.554653481933784e-06, "loss": 18.604, "step": 14817 }, { "epoch": 0.27086113294459574, "grad_norm": 6.193246128929784, "learning_rate": 8.55444530201635e-06, "loss": 17.3345, "step": 14818 }, { "epoch": 0.2708794121410423, "grad_norm": 6.473158814119927, "learning_rate": 8.554237109640891e-06, "loss": 17.4942, "step": 14819 }, { "epoch": 0.2708976913374888, "grad_norm": 5.477792509880523, "learning_rate": 8.554028904808141e-06, "loss": 17.167, "step": 14820 }, { "epoch": 0.27091597053393535, "grad_norm": 5.824733941489929, "learning_rate": 8.553820687518828e-06, "loss": 17.1212, "step": 14821 }, { "epoch": 0.2709342497303818, "grad_norm": 6.969990034486477, "learning_rate": 8.553612457773681e-06, "loss": 17.816, "step": 14822 }, { "epoch": 0.27095252892682836, "grad_norm": 7.040876437065263, "learning_rate": 8.55340421557343e-06, "loss": 17.7973, "step": 14823 }, { "epoch": 0.2709708081232749, "grad_norm": 6.981931948666637, "learning_rate": 8.553195960918808e-06, "loss": 17.7812, "step": 14824 }, { "epoch": 0.27098908731972143, "grad_norm": 17.040261005387123, "learning_rate": 8.552987693810542e-06, "loss": 17.6161, "step": 14825 }, { "epoch": 0.27100736651616797, "grad_norm": 6.440303304400119, "learning_rate": 8.552779414249362e-06, "loss": 17.7299, "step": 14826 }, { "epoch": 0.27102564571261445, "grad_norm": 6.066347738116784, "learning_rate": 8.552571122235998e-06, "loss": 17.4761, "step": 14827 }, { "epoch": 0.271043924909061, "grad_norm": 6.101927145793574, "learning_rate": 8.55236281777118e-06, "loss": 17.3433, "step": 14828 }, { "epoch": 0.2710622041055075, "grad_norm": 7.106033455011255, "learning_rate": 8.55215450085564e-06, "loss": 18.0241, "step": 14829 }, { "epoch": 0.27108048330195406, "grad_norm": 5.42307002578256, "learning_rate": 8.551946171490107e-06, "loss": 17.2391, "step": 14830 }, { "epoch": 0.2710987624984006, "grad_norm": 5.149141178645675, "learning_rate": 8.551737829675309e-06, "loss": 16.998, "step": 14831 }, { "epoch": 0.27111704169484707, "grad_norm": 5.687623706865836, "learning_rate": 8.551529475411979e-06, "loss": 17.3505, "step": 14832 }, { "epoch": 0.2711353208912936, "grad_norm": 6.000635344331766, "learning_rate": 8.551321108700846e-06, "loss": 17.1844, "step": 14833 }, { "epoch": 0.27115360008774014, "grad_norm": 7.146910464294515, "learning_rate": 8.551112729542642e-06, "loss": 17.7004, "step": 14834 }, { "epoch": 0.2711718792841867, "grad_norm": 6.072344435017131, "learning_rate": 8.550904337938095e-06, "loss": 17.3035, "step": 14835 }, { "epoch": 0.2711901584806332, "grad_norm": 5.883160755130152, "learning_rate": 8.550695933887935e-06, "loss": 17.3569, "step": 14836 }, { "epoch": 0.2712084376770797, "grad_norm": 6.013574719384061, "learning_rate": 8.550487517392896e-06, "loss": 17.2092, "step": 14837 }, { "epoch": 0.27122671687352623, "grad_norm": 6.581247721001421, "learning_rate": 8.550279088453705e-06, "loss": 17.361, "step": 14838 }, { "epoch": 0.27124499606997277, "grad_norm": 7.229041708791945, "learning_rate": 8.550070647071095e-06, "loss": 17.9663, "step": 14839 }, { "epoch": 0.2712632752664193, "grad_norm": 9.233208225351778, "learning_rate": 8.549862193245795e-06, "loss": 17.8901, "step": 14840 }, { "epoch": 0.27128155446286584, "grad_norm": 7.9748289764601985, "learning_rate": 8.549653726978538e-06, "loss": 18.2539, "step": 14841 }, { "epoch": 0.2712998336593123, "grad_norm": 5.6688321120436385, "learning_rate": 8.549445248270049e-06, "loss": 17.2339, "step": 14842 }, { "epoch": 0.27131811285575885, "grad_norm": 6.736587469374453, "learning_rate": 8.549236757121065e-06, "loss": 17.5542, "step": 14843 }, { "epoch": 0.2713363920522054, "grad_norm": 7.316596212398285, "learning_rate": 8.549028253532313e-06, "loss": 18.1392, "step": 14844 }, { "epoch": 0.2713546712486519, "grad_norm": 7.007929928197967, "learning_rate": 8.548819737504525e-06, "loss": 17.7737, "step": 14845 }, { "epoch": 0.27137295044509846, "grad_norm": 6.586250496167358, "learning_rate": 8.548611209038433e-06, "loss": 17.5552, "step": 14846 }, { "epoch": 0.27139122964154494, "grad_norm": 7.821666686457381, "learning_rate": 8.548402668134766e-06, "loss": 18.471, "step": 14847 }, { "epoch": 0.2714095088379915, "grad_norm": 6.463512499320828, "learning_rate": 8.548194114794255e-06, "loss": 17.4083, "step": 14848 }, { "epoch": 0.271427788034438, "grad_norm": 5.91809828471948, "learning_rate": 8.547985549017633e-06, "loss": 17.3628, "step": 14849 }, { "epoch": 0.27144606723088455, "grad_norm": 5.691880825744473, "learning_rate": 8.547776970805627e-06, "loss": 17.0096, "step": 14850 }, { "epoch": 0.271464346427331, "grad_norm": 5.823969072739495, "learning_rate": 8.547568380158973e-06, "loss": 17.1374, "step": 14851 }, { "epoch": 0.27148262562377756, "grad_norm": 6.396999802354459, "learning_rate": 8.547359777078399e-06, "loss": 17.5505, "step": 14852 }, { "epoch": 0.2715009048202241, "grad_norm": 6.055306693350301, "learning_rate": 8.547151161564636e-06, "loss": 17.3091, "step": 14853 }, { "epoch": 0.27151918401667063, "grad_norm": 6.948563482297491, "learning_rate": 8.546942533618417e-06, "loss": 17.6344, "step": 14854 }, { "epoch": 0.27153746321311717, "grad_norm": 6.777693158596401, "learning_rate": 8.54673389324047e-06, "loss": 17.8987, "step": 14855 }, { "epoch": 0.27155574240956365, "grad_norm": 6.641151041194556, "learning_rate": 8.54652524043153e-06, "loss": 17.8, "step": 14856 }, { "epoch": 0.2715740216060102, "grad_norm": 7.462915438731451, "learning_rate": 8.546316575192328e-06, "loss": 18.3269, "step": 14857 }, { "epoch": 0.2715923008024567, "grad_norm": 6.2508430357152704, "learning_rate": 8.546107897523592e-06, "loss": 17.6674, "step": 14858 }, { "epoch": 0.27161057999890326, "grad_norm": 6.624476504090634, "learning_rate": 8.545899207426056e-06, "loss": 17.6947, "step": 14859 }, { "epoch": 0.2716288591953498, "grad_norm": 6.221072911878215, "learning_rate": 8.545690504900452e-06, "loss": 17.4292, "step": 14860 }, { "epoch": 0.2716471383917963, "grad_norm": 10.603644841939005, "learning_rate": 8.54548178994751e-06, "loss": 18.6189, "step": 14861 }, { "epoch": 0.2716654175882428, "grad_norm": 7.174358900772363, "learning_rate": 8.545273062567963e-06, "loss": 17.9352, "step": 14862 }, { "epoch": 0.27168369678468934, "grad_norm": 6.203643896967865, "learning_rate": 8.54506432276254e-06, "loss": 17.3654, "step": 14863 }, { "epoch": 0.2717019759811359, "grad_norm": 7.270744433630595, "learning_rate": 8.544855570531974e-06, "loss": 17.7381, "step": 14864 }, { "epoch": 0.2717202551775824, "grad_norm": 5.932433514868857, "learning_rate": 8.544646805876999e-06, "loss": 17.3567, "step": 14865 }, { "epoch": 0.2717385343740289, "grad_norm": 5.7418441762364765, "learning_rate": 8.544438028798342e-06, "loss": 16.9767, "step": 14866 }, { "epoch": 0.27175681357047543, "grad_norm": 5.8992892914146635, "learning_rate": 8.54422923929674e-06, "loss": 17.4688, "step": 14867 }, { "epoch": 0.27177509276692197, "grad_norm": 5.4741094203996665, "learning_rate": 8.544020437372919e-06, "loss": 17.1419, "step": 14868 }, { "epoch": 0.2717933719633685, "grad_norm": 8.441886739004868, "learning_rate": 8.543811623027616e-06, "loss": 18.798, "step": 14869 }, { "epoch": 0.27181165115981504, "grad_norm": 7.094430727346486, "learning_rate": 8.54360279626156e-06, "loss": 17.8168, "step": 14870 }, { "epoch": 0.2718299303562615, "grad_norm": 7.59321936874265, "learning_rate": 8.543393957075483e-06, "loss": 17.806, "step": 14871 }, { "epoch": 0.27184820955270805, "grad_norm": 7.263613009008624, "learning_rate": 8.54318510547012e-06, "loss": 18.1295, "step": 14872 }, { "epoch": 0.2718664887491546, "grad_norm": 6.984233747273519, "learning_rate": 8.5429762414462e-06, "loss": 17.5231, "step": 14873 }, { "epoch": 0.2718847679456011, "grad_norm": 5.080683098741494, "learning_rate": 8.542767365004454e-06, "loss": 16.8671, "step": 14874 }, { "epoch": 0.27190304714204766, "grad_norm": 6.434155928121867, "learning_rate": 8.542558476145616e-06, "loss": 17.6545, "step": 14875 }, { "epoch": 0.27192132633849414, "grad_norm": 8.879287344854903, "learning_rate": 8.54234957487042e-06, "loss": 18.6452, "step": 14876 }, { "epoch": 0.2719396055349407, "grad_norm": 6.323199241643357, "learning_rate": 8.542140661179594e-06, "loss": 17.7601, "step": 14877 }, { "epoch": 0.2719578847313872, "grad_norm": 6.301146414422232, "learning_rate": 8.541931735073872e-06, "loss": 17.6795, "step": 14878 }, { "epoch": 0.27197616392783375, "grad_norm": 6.785669327266767, "learning_rate": 8.541722796553988e-06, "loss": 17.5498, "step": 14879 }, { "epoch": 0.2719944431242803, "grad_norm": 5.844865942548047, "learning_rate": 8.541513845620672e-06, "loss": 17.3334, "step": 14880 }, { "epoch": 0.27201272232072676, "grad_norm": 22.634779033249952, "learning_rate": 8.54130488227466e-06, "loss": 17.8193, "step": 14881 }, { "epoch": 0.2720310015171733, "grad_norm": 8.142868092799928, "learning_rate": 8.54109590651668e-06, "loss": 18.1071, "step": 14882 }, { "epoch": 0.27204928071361983, "grad_norm": 8.427618812524283, "learning_rate": 8.540886918347465e-06, "loss": 17.4152, "step": 14883 }, { "epoch": 0.27206755991006637, "grad_norm": 6.93423553369138, "learning_rate": 8.540677917767749e-06, "loss": 17.6729, "step": 14884 }, { "epoch": 0.27208583910651285, "grad_norm": 7.193372060519649, "learning_rate": 8.540468904778265e-06, "loss": 17.6561, "step": 14885 }, { "epoch": 0.2721041183029594, "grad_norm": 8.280421848562657, "learning_rate": 8.540259879379744e-06, "loss": 18.4438, "step": 14886 }, { "epoch": 0.2721223974994059, "grad_norm": 5.945522270852512, "learning_rate": 8.54005084157292e-06, "loss": 17.331, "step": 14887 }, { "epoch": 0.27214067669585246, "grad_norm": 7.74093699090885, "learning_rate": 8.539841791358523e-06, "loss": 17.8281, "step": 14888 }, { "epoch": 0.272158955892299, "grad_norm": 7.664311082329237, "learning_rate": 8.539632728737291e-06, "loss": 17.9029, "step": 14889 }, { "epoch": 0.2721772350887455, "grad_norm": 5.472508028003336, "learning_rate": 8.53942365370995e-06, "loss": 17.1254, "step": 14890 }, { "epoch": 0.272195514285192, "grad_norm": 7.2114970326785395, "learning_rate": 8.539214566277239e-06, "loss": 17.8657, "step": 14891 }, { "epoch": 0.27221379348163854, "grad_norm": 5.760661427336524, "learning_rate": 8.539005466439886e-06, "loss": 17.424, "step": 14892 }, { "epoch": 0.2722320726780851, "grad_norm": 6.507112035057774, "learning_rate": 8.538796354198629e-06, "loss": 17.6589, "step": 14893 }, { "epoch": 0.2722503518745316, "grad_norm": 6.348619332970952, "learning_rate": 8.538587229554195e-06, "loss": 17.5345, "step": 14894 }, { "epoch": 0.2722686310709781, "grad_norm": 8.662099122948792, "learning_rate": 8.53837809250732e-06, "loss": 18.2038, "step": 14895 }, { "epoch": 0.27228691026742463, "grad_norm": 6.92314883051264, "learning_rate": 8.538168943058738e-06, "loss": 17.8408, "step": 14896 }, { "epoch": 0.27230518946387117, "grad_norm": 8.953082316841071, "learning_rate": 8.537959781209181e-06, "loss": 18.1729, "step": 14897 }, { "epoch": 0.2723234686603177, "grad_norm": 7.092600096105889, "learning_rate": 8.537750606959381e-06, "loss": 18.1305, "step": 14898 }, { "epoch": 0.27234174785676424, "grad_norm": 7.241337699361368, "learning_rate": 8.537541420310072e-06, "loss": 17.98, "step": 14899 }, { "epoch": 0.2723600270532107, "grad_norm": 8.802883025943345, "learning_rate": 8.537332221261988e-06, "loss": 18.5611, "step": 14900 }, { "epoch": 0.27237830624965725, "grad_norm": 7.593543267720216, "learning_rate": 8.537123009815861e-06, "loss": 18.0538, "step": 14901 }, { "epoch": 0.2723965854461038, "grad_norm": 5.630264023919765, "learning_rate": 8.536913785972424e-06, "loss": 17.1605, "step": 14902 }, { "epoch": 0.2724148646425503, "grad_norm": 6.380544619337826, "learning_rate": 8.536704549732413e-06, "loss": 17.7363, "step": 14903 }, { "epoch": 0.27243314383899686, "grad_norm": 6.157146247117923, "learning_rate": 8.536495301096559e-06, "loss": 17.3588, "step": 14904 }, { "epoch": 0.27245142303544334, "grad_norm": 6.743080721881028, "learning_rate": 8.536286040065595e-06, "loss": 17.7994, "step": 14905 }, { "epoch": 0.2724697022318899, "grad_norm": 7.064760121129258, "learning_rate": 8.536076766640258e-06, "loss": 18.0931, "step": 14906 }, { "epoch": 0.2724879814283364, "grad_norm": 7.407565010651201, "learning_rate": 8.535867480821275e-06, "loss": 17.8473, "step": 14907 }, { "epoch": 0.27250626062478295, "grad_norm": 7.81441975945342, "learning_rate": 8.535658182609386e-06, "loss": 18.1158, "step": 14908 }, { "epoch": 0.2725245398212295, "grad_norm": 6.045435312495618, "learning_rate": 8.53544887200532e-06, "loss": 17.3503, "step": 14909 }, { "epoch": 0.27254281901767596, "grad_norm": 5.707968340287296, "learning_rate": 8.535239549009813e-06, "loss": 17.099, "step": 14910 }, { "epoch": 0.2725610982141225, "grad_norm": 6.066638919619541, "learning_rate": 8.535030213623599e-06, "loss": 17.2943, "step": 14911 }, { "epoch": 0.27257937741056903, "grad_norm": 5.900745626891175, "learning_rate": 8.53482086584741e-06, "loss": 17.4147, "step": 14912 }, { "epoch": 0.27259765660701557, "grad_norm": 6.370327324649582, "learning_rate": 8.53461150568198e-06, "loss": 17.4563, "step": 14913 }, { "epoch": 0.2726159358034621, "grad_norm": 8.7428366840071, "learning_rate": 8.534402133128044e-06, "loss": 18.1068, "step": 14914 }, { "epoch": 0.2726342149999086, "grad_norm": 7.311969184989779, "learning_rate": 8.534192748186337e-06, "loss": 18.1549, "step": 14915 }, { "epoch": 0.2726524941963551, "grad_norm": 7.222571937555025, "learning_rate": 8.53398335085759e-06, "loss": 17.9561, "step": 14916 }, { "epoch": 0.27267077339280166, "grad_norm": 6.817968807815459, "learning_rate": 8.533773941142535e-06, "loss": 17.6033, "step": 14917 }, { "epoch": 0.2726890525892482, "grad_norm": 6.645826784715755, "learning_rate": 8.533564519041913e-06, "loss": 17.4718, "step": 14918 }, { "epoch": 0.2727073317856947, "grad_norm": 7.061940441232614, "learning_rate": 8.533355084556452e-06, "loss": 17.3461, "step": 14919 }, { "epoch": 0.2727256109821412, "grad_norm": 9.420514471152142, "learning_rate": 8.533145637686889e-06, "loss": 18.8242, "step": 14920 }, { "epoch": 0.27274389017858774, "grad_norm": 5.597796718640966, "learning_rate": 8.532936178433958e-06, "loss": 16.9579, "step": 14921 }, { "epoch": 0.2727621693750343, "grad_norm": 6.1370517575521735, "learning_rate": 8.53272670679839e-06, "loss": 17.1182, "step": 14922 }, { "epoch": 0.2727804485714808, "grad_norm": 5.938901037287833, "learning_rate": 8.532517222780922e-06, "loss": 17.2498, "step": 14923 }, { "epoch": 0.2727987277679273, "grad_norm": 7.485266218181458, "learning_rate": 8.532307726382288e-06, "loss": 18.0095, "step": 14924 }, { "epoch": 0.27281700696437383, "grad_norm": 7.9292761561775835, "learning_rate": 8.532098217603222e-06, "loss": 18.264, "step": 14925 }, { "epoch": 0.27283528616082037, "grad_norm": 8.333234729593757, "learning_rate": 8.53188869644446e-06, "loss": 17.9407, "step": 14926 }, { "epoch": 0.2728535653572669, "grad_norm": 7.081977271341544, "learning_rate": 8.531679162906732e-06, "loss": 17.4561, "step": 14927 }, { "epoch": 0.27287184455371344, "grad_norm": 6.116784005544873, "learning_rate": 8.531469616990776e-06, "loss": 17.3566, "step": 14928 }, { "epoch": 0.2728901237501599, "grad_norm": 5.5221505721350175, "learning_rate": 8.531260058697326e-06, "loss": 17.4277, "step": 14929 }, { "epoch": 0.27290840294660645, "grad_norm": 7.0237127520366425, "learning_rate": 8.531050488027115e-06, "loss": 17.7201, "step": 14930 }, { "epoch": 0.272926682143053, "grad_norm": 9.834204976154917, "learning_rate": 8.530840904980878e-06, "loss": 18.8974, "step": 14931 }, { "epoch": 0.2729449613394995, "grad_norm": 7.53462570626672, "learning_rate": 8.53063130955935e-06, "loss": 17.8331, "step": 14932 }, { "epoch": 0.27296324053594606, "grad_norm": 10.658949058256267, "learning_rate": 8.530421701763268e-06, "loss": 17.5252, "step": 14933 }, { "epoch": 0.27298151973239254, "grad_norm": 6.624393401300358, "learning_rate": 8.530212081593362e-06, "loss": 17.5098, "step": 14934 }, { "epoch": 0.2729997989288391, "grad_norm": 7.940640834054324, "learning_rate": 8.53000244905037e-06, "loss": 18.0564, "step": 14935 }, { "epoch": 0.2730180781252856, "grad_norm": 6.292335902678796, "learning_rate": 8.529792804135025e-06, "loss": 17.5755, "step": 14936 }, { "epoch": 0.27303635732173215, "grad_norm": 7.781507536910704, "learning_rate": 8.529583146848063e-06, "loss": 17.5398, "step": 14937 }, { "epoch": 0.2730546365181787, "grad_norm": 6.929292405792139, "learning_rate": 8.529373477190218e-06, "loss": 17.5629, "step": 14938 }, { "epoch": 0.27307291571462516, "grad_norm": 5.940024667463652, "learning_rate": 8.529163795162225e-06, "loss": 17.3973, "step": 14939 }, { "epoch": 0.2730911949110717, "grad_norm": 6.528807380553815, "learning_rate": 8.528954100764822e-06, "loss": 17.619, "step": 14940 }, { "epoch": 0.27310947410751824, "grad_norm": 8.385382036274383, "learning_rate": 8.528744393998736e-06, "loss": 18.0734, "step": 14941 }, { "epoch": 0.27312775330396477, "grad_norm": 6.400757498934926, "learning_rate": 8.52853467486471e-06, "loss": 17.0158, "step": 14942 }, { "epoch": 0.2731460325004113, "grad_norm": 7.138467067392454, "learning_rate": 8.528324943363477e-06, "loss": 17.5021, "step": 14943 }, { "epoch": 0.2731643116968578, "grad_norm": 5.918351364852236, "learning_rate": 8.52811519949577e-06, "loss": 17.325, "step": 14944 }, { "epoch": 0.2731825908933043, "grad_norm": 6.330543141395617, "learning_rate": 8.527905443262325e-06, "loss": 17.5151, "step": 14945 }, { "epoch": 0.27320087008975086, "grad_norm": 6.996704629960327, "learning_rate": 8.527695674663878e-06, "loss": 17.7408, "step": 14946 }, { "epoch": 0.2732191492861974, "grad_norm": 8.191466999963437, "learning_rate": 8.527485893701166e-06, "loss": 17.0555, "step": 14947 }, { "epoch": 0.27323742848264393, "grad_norm": 5.806599569310809, "learning_rate": 8.527276100374919e-06, "loss": 17.3758, "step": 14948 }, { "epoch": 0.2732557076790904, "grad_norm": 7.703322244771542, "learning_rate": 8.527066294685878e-06, "loss": 18.212, "step": 14949 }, { "epoch": 0.27327398687553695, "grad_norm": 20.52918467256652, "learning_rate": 8.526856476634773e-06, "loss": 18.5831, "step": 14950 }, { "epoch": 0.2732922660719835, "grad_norm": 6.8727490222692795, "learning_rate": 8.526646646222343e-06, "loss": 17.6496, "step": 14951 }, { "epoch": 0.27331054526843, "grad_norm": 7.175407514213813, "learning_rate": 8.526436803449323e-06, "loss": 17.4754, "step": 14952 }, { "epoch": 0.2733288244648765, "grad_norm": 6.653790660641926, "learning_rate": 8.526226948316447e-06, "loss": 17.6719, "step": 14953 }, { "epoch": 0.27334710366132303, "grad_norm": 5.722892147579569, "learning_rate": 8.526017080824452e-06, "loss": 17.2459, "step": 14954 }, { "epoch": 0.27336538285776957, "grad_norm": 6.279245110003837, "learning_rate": 8.525807200974074e-06, "loss": 17.3389, "step": 14955 }, { "epoch": 0.2733836620542161, "grad_norm": 7.746941569880032, "learning_rate": 8.525597308766047e-06, "loss": 17.9757, "step": 14956 }, { "epoch": 0.27340194125066264, "grad_norm": 5.989310738332474, "learning_rate": 8.525387404201108e-06, "loss": 17.2385, "step": 14957 }, { "epoch": 0.2734202204471091, "grad_norm": 6.196033537084806, "learning_rate": 8.525177487279992e-06, "loss": 17.2249, "step": 14958 }, { "epoch": 0.27343849964355565, "grad_norm": 7.11327945725399, "learning_rate": 8.524967558003434e-06, "loss": 18.0703, "step": 14959 }, { "epoch": 0.2734567788400022, "grad_norm": 8.4964544098655, "learning_rate": 8.52475761637217e-06, "loss": 17.5121, "step": 14960 }, { "epoch": 0.2734750580364487, "grad_norm": 6.516779618103367, "learning_rate": 8.524547662386937e-06, "loss": 17.4908, "step": 14961 }, { "epoch": 0.27349333723289526, "grad_norm": 9.044453293633982, "learning_rate": 8.52433769604847e-06, "loss": 18.6725, "step": 14962 }, { "epoch": 0.27351161642934174, "grad_norm": 6.709590072747863, "learning_rate": 8.524127717357506e-06, "loss": 17.2904, "step": 14963 }, { "epoch": 0.2735298956257883, "grad_norm": 6.880374789406854, "learning_rate": 8.52391772631478e-06, "loss": 17.7179, "step": 14964 }, { "epoch": 0.2735481748222348, "grad_norm": 7.765052854218366, "learning_rate": 8.52370772292103e-06, "loss": 17.9241, "step": 14965 }, { "epoch": 0.27356645401868135, "grad_norm": 7.258938768054528, "learning_rate": 8.523497707176987e-06, "loss": 17.9309, "step": 14966 }, { "epoch": 0.2735847332151279, "grad_norm": 7.668267933278983, "learning_rate": 8.523287679083393e-06, "loss": 18.2377, "step": 14967 }, { "epoch": 0.27360301241157436, "grad_norm": 6.0395295066238885, "learning_rate": 8.52307763864098e-06, "loss": 17.3118, "step": 14968 }, { "epoch": 0.2736212916080209, "grad_norm": 6.61967279349748, "learning_rate": 8.522867585850484e-06, "loss": 17.5734, "step": 14969 }, { "epoch": 0.27363957080446744, "grad_norm": 9.006013486169277, "learning_rate": 8.522657520712645e-06, "loss": 18.049, "step": 14970 }, { "epoch": 0.27365785000091397, "grad_norm": 6.182940380764518, "learning_rate": 8.522447443228196e-06, "loss": 17.3366, "step": 14971 }, { "epoch": 0.2736761291973605, "grad_norm": 7.791436530074532, "learning_rate": 8.522237353397876e-06, "loss": 18.1704, "step": 14972 }, { "epoch": 0.273694408393807, "grad_norm": 9.39990182708251, "learning_rate": 8.522027251222418e-06, "loss": 18.3011, "step": 14973 }, { "epoch": 0.2737126875902535, "grad_norm": 7.578812576891775, "learning_rate": 8.521817136702561e-06, "loss": 18.2556, "step": 14974 }, { "epoch": 0.27373096678670006, "grad_norm": 5.352122012319598, "learning_rate": 8.52160700983904e-06, "loss": 17.3055, "step": 14975 }, { "epoch": 0.2737492459831466, "grad_norm": 7.664265293018091, "learning_rate": 8.521396870632593e-06, "loss": 18.1554, "step": 14976 }, { "epoch": 0.27376752517959313, "grad_norm": 6.232458080284897, "learning_rate": 8.521186719083954e-06, "loss": 17.4724, "step": 14977 }, { "epoch": 0.2737858043760396, "grad_norm": 6.451326554195879, "learning_rate": 8.520976555193862e-06, "loss": 17.4553, "step": 14978 }, { "epoch": 0.27380408357248615, "grad_norm": 7.112308104893728, "learning_rate": 8.520766378963054e-06, "loss": 17.8498, "step": 14979 }, { "epoch": 0.2738223627689327, "grad_norm": 6.854962262606233, "learning_rate": 8.520556190392263e-06, "loss": 17.6454, "step": 14980 }, { "epoch": 0.2738406419653792, "grad_norm": 6.434217395773612, "learning_rate": 8.52034598948223e-06, "loss": 17.3864, "step": 14981 }, { "epoch": 0.27385892116182575, "grad_norm": 7.466742836596856, "learning_rate": 8.520135776233689e-06, "loss": 18.077, "step": 14982 }, { "epoch": 0.27387720035827223, "grad_norm": 8.569752531016313, "learning_rate": 8.519925550647377e-06, "loss": 17.762, "step": 14983 }, { "epoch": 0.27389547955471877, "grad_norm": 6.082117234849648, "learning_rate": 8.519715312724032e-06, "loss": 17.2923, "step": 14984 }, { "epoch": 0.2739137587511653, "grad_norm": 6.103727004459452, "learning_rate": 8.519505062464391e-06, "loss": 17.1734, "step": 14985 }, { "epoch": 0.27393203794761184, "grad_norm": 6.199682998443549, "learning_rate": 8.519294799869189e-06, "loss": 17.4157, "step": 14986 }, { "epoch": 0.2739503171440583, "grad_norm": 9.39093050723225, "learning_rate": 8.519084524939163e-06, "loss": 18.3416, "step": 14987 }, { "epoch": 0.27396859634050486, "grad_norm": 7.2478387446581545, "learning_rate": 8.518874237675053e-06, "loss": 18.1011, "step": 14988 }, { "epoch": 0.2739868755369514, "grad_norm": 6.429645863607817, "learning_rate": 8.518663938077594e-06, "loss": 17.7044, "step": 14989 }, { "epoch": 0.2740051547333979, "grad_norm": 7.608277744380132, "learning_rate": 8.518453626147522e-06, "loss": 17.5516, "step": 14990 }, { "epoch": 0.27402343392984446, "grad_norm": 7.515836219198456, "learning_rate": 8.518243301885577e-06, "loss": 18.1083, "step": 14991 }, { "epoch": 0.27404171312629094, "grad_norm": 7.008733753396864, "learning_rate": 8.518032965292494e-06, "loss": 17.7349, "step": 14992 }, { "epoch": 0.2740599923227375, "grad_norm": 6.56982189128168, "learning_rate": 8.51782261636901e-06, "loss": 17.6305, "step": 14993 }, { "epoch": 0.274078271519184, "grad_norm": 9.070740479244042, "learning_rate": 8.517612255115864e-06, "loss": 17.8937, "step": 14994 }, { "epoch": 0.27409655071563055, "grad_norm": 7.192224862176739, "learning_rate": 8.51740188153379e-06, "loss": 18.0503, "step": 14995 }, { "epoch": 0.2741148299120771, "grad_norm": 6.73445119174964, "learning_rate": 8.517191495623532e-06, "loss": 17.3977, "step": 14996 }, { "epoch": 0.27413310910852356, "grad_norm": 7.535594093343021, "learning_rate": 8.516981097385819e-06, "loss": 17.8374, "step": 14997 }, { "epoch": 0.2741513883049701, "grad_norm": 8.484477246434745, "learning_rate": 8.516770686821394e-06, "loss": 17.9086, "step": 14998 }, { "epoch": 0.27416966750141664, "grad_norm": 6.7194537965221395, "learning_rate": 8.516560263930994e-06, "loss": 17.6165, "step": 14999 }, { "epoch": 0.27418794669786317, "grad_norm": 6.20939998983172, "learning_rate": 8.516349828715354e-06, "loss": 17.3942, "step": 15000 }, { "epoch": 0.2742062258943097, "grad_norm": 6.63079232764498, "learning_rate": 8.516139381175212e-06, "loss": 17.3441, "step": 15001 }, { "epoch": 0.2742245050907562, "grad_norm": 6.460953619207861, "learning_rate": 8.515928921311309e-06, "loss": 17.4968, "step": 15002 }, { "epoch": 0.2742427842872027, "grad_norm": 7.039951232819122, "learning_rate": 8.515718449124378e-06, "loss": 17.429, "step": 15003 }, { "epoch": 0.27426106348364926, "grad_norm": 6.60295901253017, "learning_rate": 8.51550796461516e-06, "loss": 17.5674, "step": 15004 }, { "epoch": 0.2742793426800958, "grad_norm": 6.809811201680587, "learning_rate": 8.515297467784392e-06, "loss": 17.6928, "step": 15005 }, { "epoch": 0.27429762187654233, "grad_norm": 5.919394271802698, "learning_rate": 8.515086958632812e-06, "loss": 17.132, "step": 15006 }, { "epoch": 0.2743159010729888, "grad_norm": 9.171031124987774, "learning_rate": 8.514876437161154e-06, "loss": 18.0037, "step": 15007 }, { "epoch": 0.27433418026943535, "grad_norm": 6.5707004573525785, "learning_rate": 8.514665903370163e-06, "loss": 17.612, "step": 15008 }, { "epoch": 0.2743524594658819, "grad_norm": 7.942019601890254, "learning_rate": 8.514455357260572e-06, "loss": 18.1443, "step": 15009 }, { "epoch": 0.2743707386623284, "grad_norm": 7.361567440445024, "learning_rate": 8.514244798833118e-06, "loss": 17.7264, "step": 15010 }, { "epoch": 0.27438901785877495, "grad_norm": 7.443035909101035, "learning_rate": 8.514034228088542e-06, "loss": 17.4236, "step": 15011 }, { "epoch": 0.27440729705522143, "grad_norm": 6.680721693866458, "learning_rate": 8.513823645027581e-06, "loss": 17.6669, "step": 15012 }, { "epoch": 0.27442557625166797, "grad_norm": 7.444716603510005, "learning_rate": 8.513613049650972e-06, "loss": 17.6669, "step": 15013 }, { "epoch": 0.2744438554481145, "grad_norm": 5.649032063460323, "learning_rate": 8.513402441959457e-06, "loss": 17.1612, "step": 15014 }, { "epoch": 0.27446213464456104, "grad_norm": 7.1527954607750495, "learning_rate": 8.513191821953771e-06, "loss": 17.6386, "step": 15015 }, { "epoch": 0.2744804138410076, "grad_norm": 5.197978034091865, "learning_rate": 8.51298118963465e-06, "loss": 16.9496, "step": 15016 }, { "epoch": 0.27449869303745406, "grad_norm": 5.957658222387719, "learning_rate": 8.512770545002835e-06, "loss": 17.0909, "step": 15017 }, { "epoch": 0.2745169722339006, "grad_norm": 5.579661912686926, "learning_rate": 8.512559888059066e-06, "loss": 17.0729, "step": 15018 }, { "epoch": 0.2745352514303471, "grad_norm": 6.901410980448181, "learning_rate": 8.51234921880408e-06, "loss": 17.6866, "step": 15019 }, { "epoch": 0.27455353062679366, "grad_norm": 7.105586637830406, "learning_rate": 8.51213853723861e-06, "loss": 17.481, "step": 15020 }, { "epoch": 0.27457180982324014, "grad_norm": 6.268941979747861, "learning_rate": 8.511927843363403e-06, "loss": 17.5731, "step": 15021 }, { "epoch": 0.2745900890196867, "grad_norm": 6.8508749311522745, "learning_rate": 8.511717137179193e-06, "loss": 17.7081, "step": 15022 }, { "epoch": 0.2746083682161332, "grad_norm": 7.883185569147722, "learning_rate": 8.51150641868672e-06, "loss": 17.7303, "step": 15023 }, { "epoch": 0.27462664741257975, "grad_norm": 7.524898587945424, "learning_rate": 8.511295687886721e-06, "loss": 17.9761, "step": 15024 }, { "epoch": 0.2746449266090263, "grad_norm": 5.85153730137204, "learning_rate": 8.511084944779935e-06, "loss": 17.2036, "step": 15025 }, { "epoch": 0.27466320580547277, "grad_norm": 6.219150490546941, "learning_rate": 8.510874189367103e-06, "loss": 17.5252, "step": 15026 }, { "epoch": 0.2746814850019193, "grad_norm": 5.920963708816412, "learning_rate": 8.51066342164896e-06, "loss": 17.2042, "step": 15027 }, { "epoch": 0.27469976419836584, "grad_norm": 7.768715350254114, "learning_rate": 8.510452641626244e-06, "loss": 17.8859, "step": 15028 }, { "epoch": 0.2747180433948124, "grad_norm": 6.978706382257655, "learning_rate": 8.510241849299698e-06, "loss": 17.3981, "step": 15029 }, { "epoch": 0.2747363225912589, "grad_norm": 6.927354688887749, "learning_rate": 8.51003104467006e-06, "loss": 17.3634, "step": 15030 }, { "epoch": 0.2747546017877054, "grad_norm": 6.215084155147541, "learning_rate": 8.509820227738068e-06, "loss": 17.346, "step": 15031 }, { "epoch": 0.2747728809841519, "grad_norm": 7.464315358997942, "learning_rate": 8.50960939850446e-06, "loss": 17.6887, "step": 15032 }, { "epoch": 0.27479116018059846, "grad_norm": 8.080319942077573, "learning_rate": 8.509398556969975e-06, "loss": 18.0081, "step": 15033 }, { "epoch": 0.274809439377045, "grad_norm": 6.1164665928270185, "learning_rate": 8.509187703135352e-06, "loss": 17.149, "step": 15034 }, { "epoch": 0.27482771857349153, "grad_norm": 7.341736505958095, "learning_rate": 8.508976837001331e-06, "loss": 17.5834, "step": 15035 }, { "epoch": 0.274845997769938, "grad_norm": 5.674655937620607, "learning_rate": 8.508765958568651e-06, "loss": 17.2539, "step": 15036 }, { "epoch": 0.27486427696638455, "grad_norm": 6.70835978578212, "learning_rate": 8.508555067838051e-06, "loss": 17.6066, "step": 15037 }, { "epoch": 0.2748825561628311, "grad_norm": 9.383413690301147, "learning_rate": 8.50834416481027e-06, "loss": 18.3229, "step": 15038 }, { "epoch": 0.2749008353592776, "grad_norm": 6.253300911206716, "learning_rate": 8.508133249486048e-06, "loss": 17.358, "step": 15039 }, { "epoch": 0.27491911455572415, "grad_norm": 6.783213990413725, "learning_rate": 8.507922321866122e-06, "loss": 17.7181, "step": 15040 }, { "epoch": 0.27493739375217063, "grad_norm": 6.3578078618944875, "learning_rate": 8.507711381951234e-06, "loss": 17.2029, "step": 15041 }, { "epoch": 0.27495567294861717, "grad_norm": 6.230551192999519, "learning_rate": 8.50750042974212e-06, "loss": 17.4552, "step": 15042 }, { "epoch": 0.2749739521450637, "grad_norm": 7.4337802008058755, "learning_rate": 8.507289465239523e-06, "loss": 17.7231, "step": 15043 }, { "epoch": 0.27499223134151024, "grad_norm": 6.350096799832979, "learning_rate": 8.50707848844418e-06, "loss": 17.2701, "step": 15044 }, { "epoch": 0.2750105105379568, "grad_norm": 6.071829725294881, "learning_rate": 8.506867499356832e-06, "loss": 17.0834, "step": 15045 }, { "epoch": 0.27502878973440326, "grad_norm": 8.824764724261522, "learning_rate": 8.506656497978216e-06, "loss": 18.5188, "step": 15046 }, { "epoch": 0.2750470689308498, "grad_norm": 6.326763097824839, "learning_rate": 8.506445484309075e-06, "loss": 17.5691, "step": 15047 }, { "epoch": 0.2750653481272963, "grad_norm": 6.299871779090824, "learning_rate": 8.506234458350146e-06, "loss": 17.5947, "step": 15048 }, { "epoch": 0.27508362732374286, "grad_norm": 6.784974901995447, "learning_rate": 8.50602342010217e-06, "loss": 17.2225, "step": 15049 }, { "epoch": 0.2751019065201894, "grad_norm": 5.913411381000654, "learning_rate": 8.505812369565886e-06, "loss": 17.0461, "step": 15050 }, { "epoch": 0.2751201857166359, "grad_norm": 9.54384824095882, "learning_rate": 8.505601306742035e-06, "loss": 17.6065, "step": 15051 }, { "epoch": 0.2751384649130824, "grad_norm": 8.708857677923003, "learning_rate": 8.505390231631354e-06, "loss": 18.7173, "step": 15052 }, { "epoch": 0.27515674410952895, "grad_norm": 6.889078884155962, "learning_rate": 8.505179144234583e-06, "loss": 17.5667, "step": 15053 }, { "epoch": 0.2751750233059755, "grad_norm": 6.497347007870209, "learning_rate": 8.504968044552466e-06, "loss": 17.6546, "step": 15054 }, { "epoch": 0.27519330250242197, "grad_norm": 6.421667153551877, "learning_rate": 8.50475693258574e-06, "loss": 17.5946, "step": 15055 }, { "epoch": 0.2752115816988685, "grad_norm": 5.667448860596541, "learning_rate": 8.504545808335144e-06, "loss": 16.8418, "step": 15056 }, { "epoch": 0.27522986089531504, "grad_norm": 6.312400458371548, "learning_rate": 8.50433467180142e-06, "loss": 17.1851, "step": 15057 }, { "epoch": 0.2752481400917616, "grad_norm": 8.135814509613942, "learning_rate": 8.504123522985306e-06, "loss": 18.0047, "step": 15058 }, { "epoch": 0.2752664192882081, "grad_norm": 6.2714951204111165, "learning_rate": 8.503912361887543e-06, "loss": 17.4653, "step": 15059 }, { "epoch": 0.2752846984846546, "grad_norm": 6.249592788376225, "learning_rate": 8.503701188508872e-06, "loss": 17.3866, "step": 15060 }, { "epoch": 0.2753029776811011, "grad_norm": 9.173638770203379, "learning_rate": 8.503490002850032e-06, "loss": 18.9345, "step": 15061 }, { "epoch": 0.27532125687754766, "grad_norm": 6.725246630707651, "learning_rate": 8.503278804911763e-06, "loss": 17.5321, "step": 15062 }, { "epoch": 0.2753395360739942, "grad_norm": 6.304788185687925, "learning_rate": 8.503067594694807e-06, "loss": 17.5342, "step": 15063 }, { "epoch": 0.27535781527044073, "grad_norm": 6.899815264393031, "learning_rate": 8.502856372199903e-06, "loss": 17.7755, "step": 15064 }, { "epoch": 0.2753760944668872, "grad_norm": 7.4373658126487525, "learning_rate": 8.502645137427793e-06, "loss": 18.2004, "step": 15065 }, { "epoch": 0.27539437366333375, "grad_norm": 6.685823341875556, "learning_rate": 8.502433890379212e-06, "loss": 17.7411, "step": 15066 }, { "epoch": 0.2754126528597803, "grad_norm": 6.258873398752116, "learning_rate": 8.502222631054906e-06, "loss": 17.3507, "step": 15067 }, { "epoch": 0.2754309320562268, "grad_norm": 6.580744993073601, "learning_rate": 8.502011359455613e-06, "loss": 17.6622, "step": 15068 }, { "epoch": 0.27544921125267335, "grad_norm": 5.382922333129405, "learning_rate": 8.501800075582073e-06, "loss": 16.902, "step": 15069 }, { "epoch": 0.27546749044911983, "grad_norm": 6.387901150632027, "learning_rate": 8.501588779435031e-06, "loss": 17.3893, "step": 15070 }, { "epoch": 0.27548576964556637, "grad_norm": 6.282565559703935, "learning_rate": 8.501377471015222e-06, "loss": 17.4775, "step": 15071 }, { "epoch": 0.2755040488420129, "grad_norm": 8.347736709520138, "learning_rate": 8.501166150323389e-06, "loss": 18.3767, "step": 15072 }, { "epoch": 0.27552232803845944, "grad_norm": 6.133801029727497, "learning_rate": 8.500954817360271e-06, "loss": 17.4881, "step": 15073 }, { "epoch": 0.275540607234906, "grad_norm": 6.7079679931503025, "learning_rate": 8.500743472126613e-06, "loss": 17.2911, "step": 15074 }, { "epoch": 0.27555888643135246, "grad_norm": 6.837401980531877, "learning_rate": 8.500532114623152e-06, "loss": 17.5818, "step": 15075 }, { "epoch": 0.275577165627799, "grad_norm": 7.802182273194156, "learning_rate": 8.50032074485063e-06, "loss": 17.8849, "step": 15076 }, { "epoch": 0.27559544482424553, "grad_norm": 6.60104311402147, "learning_rate": 8.500109362809785e-06, "loss": 17.4672, "step": 15077 }, { "epoch": 0.27561372402069206, "grad_norm": 7.163784549768292, "learning_rate": 8.499897968501363e-06, "loss": 17.4801, "step": 15078 }, { "epoch": 0.2756320032171386, "grad_norm": 5.791770037994613, "learning_rate": 8.499686561926102e-06, "loss": 17.0722, "step": 15079 }, { "epoch": 0.2756502824135851, "grad_norm": 8.259606438480516, "learning_rate": 8.499475143084743e-06, "loss": 18.5193, "step": 15080 }, { "epoch": 0.2756685616100316, "grad_norm": 9.952855206016688, "learning_rate": 8.499263711978027e-06, "loss": 18.9349, "step": 15081 }, { "epoch": 0.27568684080647815, "grad_norm": 7.221818700686994, "learning_rate": 8.499052268606696e-06, "loss": 18.1122, "step": 15082 }, { "epoch": 0.2757051200029247, "grad_norm": 5.80364490686129, "learning_rate": 8.49884081297149e-06, "loss": 17.311, "step": 15083 }, { "epoch": 0.2757233991993712, "grad_norm": 6.391870139942319, "learning_rate": 8.49862934507315e-06, "loss": 17.7093, "step": 15084 }, { "epoch": 0.2757416783958177, "grad_norm": 11.325604702446954, "learning_rate": 8.498417864912418e-06, "loss": 18.6128, "step": 15085 }, { "epoch": 0.27575995759226424, "grad_norm": 6.693411345117484, "learning_rate": 8.498206372490036e-06, "loss": 17.5917, "step": 15086 }, { "epoch": 0.2757782367887108, "grad_norm": 6.856529657181099, "learning_rate": 8.497994867806743e-06, "loss": 17.7577, "step": 15087 }, { "epoch": 0.2757965159851573, "grad_norm": 6.169603565284105, "learning_rate": 8.497783350863282e-06, "loss": 17.3158, "step": 15088 }, { "epoch": 0.2758147951816038, "grad_norm": 6.683157507006591, "learning_rate": 8.497571821660393e-06, "loss": 17.6057, "step": 15089 }, { "epoch": 0.2758330743780503, "grad_norm": 6.776299964282804, "learning_rate": 8.497360280198819e-06, "loss": 17.5894, "step": 15090 }, { "epoch": 0.27585135357449686, "grad_norm": 6.663756349873055, "learning_rate": 8.497148726479302e-06, "loss": 17.6064, "step": 15091 }, { "epoch": 0.2758696327709434, "grad_norm": 5.606218917028794, "learning_rate": 8.49693716050258e-06, "loss": 17.1346, "step": 15092 }, { "epoch": 0.27588791196738993, "grad_norm": 6.761619243793991, "learning_rate": 8.496725582269399e-06, "loss": 17.8594, "step": 15093 }, { "epoch": 0.2759061911638364, "grad_norm": 7.383838866864371, "learning_rate": 8.496513991780496e-06, "loss": 17.9355, "step": 15094 }, { "epoch": 0.27592447036028295, "grad_norm": 5.878574225581679, "learning_rate": 8.496302389036618e-06, "loss": 17.3235, "step": 15095 }, { "epoch": 0.2759427495567295, "grad_norm": 7.045753771894859, "learning_rate": 8.4960907740385e-06, "loss": 17.9473, "step": 15096 }, { "epoch": 0.275961028753176, "grad_norm": 6.874159400949003, "learning_rate": 8.495879146786888e-06, "loss": 17.6823, "step": 15097 }, { "epoch": 0.27597930794962255, "grad_norm": 5.931777682275133, "learning_rate": 8.495667507282523e-06, "loss": 17.2954, "step": 15098 }, { "epoch": 0.27599758714606903, "grad_norm": 5.658249804303634, "learning_rate": 8.495455855526149e-06, "loss": 17.437, "step": 15099 }, { "epoch": 0.27601586634251557, "grad_norm": 7.709657203321893, "learning_rate": 8.495244191518503e-06, "loss": 18.0923, "step": 15100 }, { "epoch": 0.2760341455389621, "grad_norm": 6.419851040462697, "learning_rate": 8.495032515260332e-06, "loss": 17.3243, "step": 15101 }, { "epoch": 0.27605242473540864, "grad_norm": 6.235144609086725, "learning_rate": 8.494820826752373e-06, "loss": 17.1859, "step": 15102 }, { "epoch": 0.2760707039318552, "grad_norm": 6.224796415996176, "learning_rate": 8.49460912599537e-06, "loss": 17.5906, "step": 15103 }, { "epoch": 0.27608898312830166, "grad_norm": 6.818567391082437, "learning_rate": 8.494397412990064e-06, "loss": 17.7974, "step": 15104 }, { "epoch": 0.2761072623247482, "grad_norm": 6.345565752759843, "learning_rate": 8.494185687737202e-06, "loss": 17.3961, "step": 15105 }, { "epoch": 0.27612554152119473, "grad_norm": 6.181317681537005, "learning_rate": 8.49397395023752e-06, "loss": 17.3983, "step": 15106 }, { "epoch": 0.27614382071764126, "grad_norm": 5.922731987022923, "learning_rate": 8.493762200491764e-06, "loss": 17.3817, "step": 15107 }, { "epoch": 0.2761620999140878, "grad_norm": 7.005057570083185, "learning_rate": 8.493550438500674e-06, "loss": 17.7266, "step": 15108 }, { "epoch": 0.2761803791105343, "grad_norm": 6.056443598234483, "learning_rate": 8.493338664264994e-06, "loss": 17.4693, "step": 15109 }, { "epoch": 0.2761986583069808, "grad_norm": 6.870748091461127, "learning_rate": 8.493126877785462e-06, "loss": 17.5765, "step": 15110 }, { "epoch": 0.27621693750342735, "grad_norm": 6.255024517094979, "learning_rate": 8.492915079062825e-06, "loss": 17.3604, "step": 15111 }, { "epoch": 0.2762352166998739, "grad_norm": 7.560081869785773, "learning_rate": 8.492703268097826e-06, "loss": 17.9092, "step": 15112 }, { "epoch": 0.2762534958963204, "grad_norm": 6.943035687647867, "learning_rate": 8.492491444891202e-06, "loss": 17.582, "step": 15113 }, { "epoch": 0.2762717750927669, "grad_norm": 6.237283884131271, "learning_rate": 8.4922796094437e-06, "loss": 17.3341, "step": 15114 }, { "epoch": 0.27629005428921344, "grad_norm": 8.386729445077309, "learning_rate": 8.49206776175606e-06, "loss": 18.5546, "step": 15115 }, { "epoch": 0.27630833348566, "grad_norm": 6.636602832320478, "learning_rate": 8.491855901829028e-06, "loss": 17.4801, "step": 15116 }, { "epoch": 0.2763266126821065, "grad_norm": 5.739313425652385, "learning_rate": 8.491644029663342e-06, "loss": 17.1745, "step": 15117 }, { "epoch": 0.27634489187855305, "grad_norm": 5.728039251304245, "learning_rate": 8.491432145259746e-06, "loss": 17.0786, "step": 15118 }, { "epoch": 0.2763631710749995, "grad_norm": 6.301290758430111, "learning_rate": 8.491220248618985e-06, "loss": 17.5457, "step": 15119 }, { "epoch": 0.27638145027144606, "grad_norm": 7.303871962582155, "learning_rate": 8.4910083397418e-06, "loss": 18.0031, "step": 15120 }, { "epoch": 0.2763997294678926, "grad_norm": 6.767786507919163, "learning_rate": 8.490796418628933e-06, "loss": 17.4871, "step": 15121 }, { "epoch": 0.27641800866433913, "grad_norm": 7.215094564305718, "learning_rate": 8.490584485281126e-06, "loss": 17.8481, "step": 15122 }, { "epoch": 0.2764362878607856, "grad_norm": 6.125606787595317, "learning_rate": 8.490372539699125e-06, "loss": 17.2545, "step": 15123 }, { "epoch": 0.27645456705723215, "grad_norm": 7.274970598348587, "learning_rate": 8.49016058188367e-06, "loss": 17.6086, "step": 15124 }, { "epoch": 0.2764728462536787, "grad_norm": 7.0521901233773745, "learning_rate": 8.489948611835507e-06, "loss": 17.9983, "step": 15125 }, { "epoch": 0.2764911254501252, "grad_norm": 7.406438818444638, "learning_rate": 8.489736629555376e-06, "loss": 17.8855, "step": 15126 }, { "epoch": 0.27650940464657175, "grad_norm": 5.464736523236706, "learning_rate": 8.48952463504402e-06, "loss": 17.2808, "step": 15127 }, { "epoch": 0.27652768384301823, "grad_norm": 6.7985981641731925, "learning_rate": 8.489312628302184e-06, "loss": 17.553, "step": 15128 }, { "epoch": 0.27654596303946477, "grad_norm": 5.7880092429170755, "learning_rate": 8.489100609330611e-06, "loss": 17.5625, "step": 15129 }, { "epoch": 0.2765642422359113, "grad_norm": 7.5237098939421445, "learning_rate": 8.48888857813004e-06, "loss": 17.8427, "step": 15130 }, { "epoch": 0.27658252143235784, "grad_norm": 5.818770988021056, "learning_rate": 8.488676534701222e-06, "loss": 17.4699, "step": 15131 }, { "epoch": 0.2766008006288044, "grad_norm": 7.122951813383191, "learning_rate": 8.48846447904489e-06, "loss": 17.8285, "step": 15132 }, { "epoch": 0.27661907982525086, "grad_norm": 7.38715813034999, "learning_rate": 8.488252411161797e-06, "loss": 18.2292, "step": 15133 }, { "epoch": 0.2766373590216974, "grad_norm": 6.675865547873947, "learning_rate": 8.48804033105268e-06, "loss": 17.6094, "step": 15134 }, { "epoch": 0.27665563821814393, "grad_norm": 6.027580623893715, "learning_rate": 8.487828238718286e-06, "loss": 17.2813, "step": 15135 }, { "epoch": 0.27667391741459046, "grad_norm": 7.023246440827114, "learning_rate": 8.487616134159355e-06, "loss": 17.7784, "step": 15136 }, { "epoch": 0.276692196611037, "grad_norm": 6.092554966001453, "learning_rate": 8.487404017376632e-06, "loss": 17.2843, "step": 15137 }, { "epoch": 0.2767104758074835, "grad_norm": 5.93347495303618, "learning_rate": 8.48719188837086e-06, "loss": 17.4982, "step": 15138 }, { "epoch": 0.27672875500393, "grad_norm": 5.654297826826496, "learning_rate": 8.486979747142785e-06, "loss": 16.9761, "step": 15139 }, { "epoch": 0.27674703420037655, "grad_norm": 6.0145806159862, "learning_rate": 8.486767593693148e-06, "loss": 17.1847, "step": 15140 }, { "epoch": 0.2767653133968231, "grad_norm": 13.957909146183193, "learning_rate": 8.486555428022692e-06, "loss": 18.0061, "step": 15141 }, { "epoch": 0.2767835925932696, "grad_norm": 7.1051146284803135, "learning_rate": 8.486343250132163e-06, "loss": 17.6647, "step": 15142 }, { "epoch": 0.2768018717897161, "grad_norm": 6.318525136817179, "learning_rate": 8.486131060022303e-06, "loss": 17.2715, "step": 15143 }, { "epoch": 0.27682015098616264, "grad_norm": 6.556186221346598, "learning_rate": 8.485918857693855e-06, "loss": 17.885, "step": 15144 }, { "epoch": 0.2768384301826092, "grad_norm": 5.674162468492682, "learning_rate": 8.485706643147567e-06, "loss": 17.1208, "step": 15145 }, { "epoch": 0.2768567093790557, "grad_norm": 7.981543699737078, "learning_rate": 8.485494416384177e-06, "loss": 17.711, "step": 15146 }, { "epoch": 0.27687498857550225, "grad_norm": 5.898058900565279, "learning_rate": 8.485282177404433e-06, "loss": 17.3461, "step": 15147 }, { "epoch": 0.2768932677719487, "grad_norm": 7.49345719971531, "learning_rate": 8.485069926209076e-06, "loss": 17.9474, "step": 15148 }, { "epoch": 0.27691154696839526, "grad_norm": 6.075856025454034, "learning_rate": 8.484857662798853e-06, "loss": 17.3403, "step": 15149 }, { "epoch": 0.2769298261648418, "grad_norm": 8.057692135962697, "learning_rate": 8.484645387174505e-06, "loss": 18.4278, "step": 15150 }, { "epoch": 0.27694810536128833, "grad_norm": 5.72547415657414, "learning_rate": 8.484433099336778e-06, "loss": 17.1654, "step": 15151 }, { "epoch": 0.27696638455773487, "grad_norm": 8.258010290672802, "learning_rate": 8.484220799286414e-06, "loss": 18.2429, "step": 15152 }, { "epoch": 0.27698466375418135, "grad_norm": 6.320224459549836, "learning_rate": 8.484008487024159e-06, "loss": 17.6195, "step": 15153 }, { "epoch": 0.2770029429506279, "grad_norm": 5.809548766519819, "learning_rate": 8.483796162550756e-06, "loss": 17.1871, "step": 15154 }, { "epoch": 0.2770212221470744, "grad_norm": 6.292791614927964, "learning_rate": 8.483583825866952e-06, "loss": 17.5189, "step": 15155 }, { "epoch": 0.27703950134352096, "grad_norm": 7.2897651927097975, "learning_rate": 8.483371476973488e-06, "loss": 17.9668, "step": 15156 }, { "epoch": 0.27705778053996744, "grad_norm": 6.346951510267883, "learning_rate": 8.483159115871109e-06, "loss": 17.4059, "step": 15157 }, { "epoch": 0.27707605973641397, "grad_norm": 8.069388792248683, "learning_rate": 8.48294674256056e-06, "loss": 18.2912, "step": 15158 }, { "epoch": 0.2770943389328605, "grad_norm": 6.267336526982611, "learning_rate": 8.482734357042584e-06, "loss": 17.3161, "step": 15159 }, { "epoch": 0.27711261812930704, "grad_norm": 6.705459898303039, "learning_rate": 8.482521959317926e-06, "loss": 17.3089, "step": 15160 }, { "epoch": 0.2771308973257536, "grad_norm": 5.928807624867987, "learning_rate": 8.482309549387333e-06, "loss": 17.4545, "step": 15161 }, { "epoch": 0.27714917652220006, "grad_norm": 7.814767906037155, "learning_rate": 8.482097127251545e-06, "loss": 17.5292, "step": 15162 }, { "epoch": 0.2771674557186466, "grad_norm": 5.816355538136252, "learning_rate": 8.481884692911308e-06, "loss": 17.2492, "step": 15163 }, { "epoch": 0.27718573491509313, "grad_norm": 7.033786910236613, "learning_rate": 8.48167224636737e-06, "loss": 18.1615, "step": 15164 }, { "epoch": 0.27720401411153966, "grad_norm": 5.430195463635135, "learning_rate": 8.481459787620472e-06, "loss": 17.1092, "step": 15165 }, { "epoch": 0.2772222933079862, "grad_norm": 6.123967341476146, "learning_rate": 8.481247316671358e-06, "loss": 17.4193, "step": 15166 }, { "epoch": 0.2772405725044327, "grad_norm": 5.5224896668202765, "learning_rate": 8.481034833520776e-06, "loss": 17.1137, "step": 15167 }, { "epoch": 0.2772588517008792, "grad_norm": 5.863051389843126, "learning_rate": 8.480822338169468e-06, "loss": 17.2692, "step": 15168 }, { "epoch": 0.27727713089732575, "grad_norm": 6.441945504782519, "learning_rate": 8.48060983061818e-06, "loss": 17.6335, "step": 15169 }, { "epoch": 0.2772954100937723, "grad_norm": 6.34402376146172, "learning_rate": 8.480397310867657e-06, "loss": 17.4571, "step": 15170 }, { "epoch": 0.2773136892902188, "grad_norm": 8.353750088079025, "learning_rate": 8.480184778918644e-06, "loss": 18.0318, "step": 15171 }, { "epoch": 0.2773319684866653, "grad_norm": 7.132669222601691, "learning_rate": 8.479972234771883e-06, "loss": 17.8458, "step": 15172 }, { "epoch": 0.27735024768311184, "grad_norm": 6.210608983505865, "learning_rate": 8.479759678428123e-06, "loss": 17.5225, "step": 15173 }, { "epoch": 0.2773685268795584, "grad_norm": 6.017708530134503, "learning_rate": 8.479547109888108e-06, "loss": 17.2827, "step": 15174 }, { "epoch": 0.2773868060760049, "grad_norm": 7.086861207018966, "learning_rate": 8.479334529152582e-06, "loss": 17.6825, "step": 15175 }, { "epoch": 0.27740508527245145, "grad_norm": 6.989417468633682, "learning_rate": 8.479121936222288e-06, "loss": 17.7719, "step": 15176 }, { "epoch": 0.2774233644688979, "grad_norm": 6.034472674549152, "learning_rate": 8.478909331097975e-06, "loss": 17.4463, "step": 15177 }, { "epoch": 0.27744164366534446, "grad_norm": 6.788649532752917, "learning_rate": 8.478696713780388e-06, "loss": 17.771, "step": 15178 }, { "epoch": 0.277459922861791, "grad_norm": 6.179746451369153, "learning_rate": 8.47848408427027e-06, "loss": 17.2697, "step": 15179 }, { "epoch": 0.27747820205823753, "grad_norm": 6.021264277558732, "learning_rate": 8.478271442568366e-06, "loss": 17.1685, "step": 15180 }, { "epoch": 0.27749648125468407, "grad_norm": 6.211870963934204, "learning_rate": 8.478058788675424e-06, "loss": 17.2924, "step": 15181 }, { "epoch": 0.27751476045113055, "grad_norm": 6.08344944737126, "learning_rate": 8.477846122592188e-06, "loss": 17.3834, "step": 15182 }, { "epoch": 0.2775330396475771, "grad_norm": 7.322540912423537, "learning_rate": 8.477633444319401e-06, "loss": 17.6685, "step": 15183 }, { "epoch": 0.2775513188440236, "grad_norm": 6.987847075776526, "learning_rate": 8.477420753857813e-06, "loss": 17.7501, "step": 15184 }, { "epoch": 0.27756959804047016, "grad_norm": 7.145721719355926, "learning_rate": 8.477208051208166e-06, "loss": 17.8193, "step": 15185 }, { "epoch": 0.2775878772369167, "grad_norm": 7.465490201658691, "learning_rate": 8.476995336371207e-06, "loss": 17.9038, "step": 15186 }, { "epoch": 0.27760615643336317, "grad_norm": 6.450686128331409, "learning_rate": 8.47678260934768e-06, "loss": 17.7545, "step": 15187 }, { "epoch": 0.2776244356298097, "grad_norm": 5.831032254584482, "learning_rate": 8.476569870138332e-06, "loss": 17.2206, "step": 15188 }, { "epoch": 0.27764271482625624, "grad_norm": 6.9563310533709055, "learning_rate": 8.476357118743909e-06, "loss": 17.5481, "step": 15189 }, { "epoch": 0.2776609940227028, "grad_norm": 7.393013819431112, "learning_rate": 8.476144355165154e-06, "loss": 17.7355, "step": 15190 }, { "epoch": 0.27767927321914926, "grad_norm": 6.801743686131153, "learning_rate": 8.475931579402816e-06, "loss": 17.6852, "step": 15191 }, { "epoch": 0.2776975524155958, "grad_norm": 6.118024024183996, "learning_rate": 8.475718791457642e-06, "loss": 17.3726, "step": 15192 }, { "epoch": 0.27771583161204233, "grad_norm": 6.365383240770513, "learning_rate": 8.47550599133037e-06, "loss": 17.4786, "step": 15193 }, { "epoch": 0.27773411080848887, "grad_norm": 7.625483733167843, "learning_rate": 8.475293179021756e-06, "loss": 18.2475, "step": 15194 }, { "epoch": 0.2777523900049354, "grad_norm": 5.163126788299081, "learning_rate": 8.475080354532538e-06, "loss": 17.2556, "step": 15195 }, { "epoch": 0.2777706692013819, "grad_norm": 6.736192556992801, "learning_rate": 8.474867517863466e-06, "loss": 17.5523, "step": 15196 }, { "epoch": 0.2777889483978284, "grad_norm": 6.561879531359489, "learning_rate": 8.474654669015285e-06, "loss": 17.5083, "step": 15197 }, { "epoch": 0.27780722759427495, "grad_norm": 6.758920266786231, "learning_rate": 8.47444180798874e-06, "loss": 17.5761, "step": 15198 }, { "epoch": 0.2778255067907215, "grad_norm": 6.142855284877659, "learning_rate": 8.474228934784579e-06, "loss": 17.2216, "step": 15199 }, { "epoch": 0.277843785987168, "grad_norm": 6.649292497145365, "learning_rate": 8.474016049403547e-06, "loss": 17.4655, "step": 15200 }, { "epoch": 0.2778620651836145, "grad_norm": 5.50710130195196, "learning_rate": 8.473803151846389e-06, "loss": 17.1131, "step": 15201 }, { "epoch": 0.27788034438006104, "grad_norm": 5.051523217599611, "learning_rate": 8.473590242113854e-06, "loss": 16.9992, "step": 15202 }, { "epoch": 0.2778986235765076, "grad_norm": 7.419341418076164, "learning_rate": 8.473377320206685e-06, "loss": 18.1681, "step": 15203 }, { "epoch": 0.2779169027729541, "grad_norm": 6.583411215133172, "learning_rate": 8.47316438612563e-06, "loss": 17.6993, "step": 15204 }, { "epoch": 0.27793518196940065, "grad_norm": 10.621010132856448, "learning_rate": 8.472951439871437e-06, "loss": 17.979, "step": 15205 }, { "epoch": 0.2779534611658471, "grad_norm": 6.9761154458200485, "learning_rate": 8.472738481444849e-06, "loss": 17.7062, "step": 15206 }, { "epoch": 0.27797174036229366, "grad_norm": 8.09995108118209, "learning_rate": 8.472525510846615e-06, "loss": 18.1606, "step": 15207 }, { "epoch": 0.2779900195587402, "grad_norm": 6.237726067150974, "learning_rate": 8.472312528077478e-06, "loss": 17.2962, "step": 15208 }, { "epoch": 0.27800829875518673, "grad_norm": 8.064495181918465, "learning_rate": 8.472099533138189e-06, "loss": 18.4658, "step": 15209 }, { "epoch": 0.27802657795163327, "grad_norm": 5.936813021744895, "learning_rate": 8.471886526029494e-06, "loss": 17.1052, "step": 15210 }, { "epoch": 0.27804485714807975, "grad_norm": 6.116096489328429, "learning_rate": 8.471673506752135e-06, "loss": 17.3807, "step": 15211 }, { "epoch": 0.2780631363445263, "grad_norm": 6.622652093645998, "learning_rate": 8.471460475306862e-06, "loss": 17.6376, "step": 15212 }, { "epoch": 0.2780814155409728, "grad_norm": 6.51173487662506, "learning_rate": 8.471247431694422e-06, "loss": 17.6722, "step": 15213 }, { "epoch": 0.27809969473741936, "grad_norm": 6.6349494648415925, "learning_rate": 8.471034375915562e-06, "loss": 17.2563, "step": 15214 }, { "epoch": 0.2781179739338659, "grad_norm": 6.05963094836774, "learning_rate": 8.470821307971026e-06, "loss": 17.3188, "step": 15215 }, { "epoch": 0.27813625313031237, "grad_norm": 7.724124385030072, "learning_rate": 8.470608227861565e-06, "loss": 17.6223, "step": 15216 }, { "epoch": 0.2781545323267589, "grad_norm": 7.930717661901581, "learning_rate": 8.47039513558792e-06, "loss": 18.4473, "step": 15217 }, { "epoch": 0.27817281152320544, "grad_norm": 6.870144256274125, "learning_rate": 8.470182031150843e-06, "loss": 17.8599, "step": 15218 }, { "epoch": 0.278191090719652, "grad_norm": 6.339470043941988, "learning_rate": 8.46996891455108e-06, "loss": 17.6292, "step": 15219 }, { "epoch": 0.2782093699160985, "grad_norm": 6.876491826935089, "learning_rate": 8.469755785789375e-06, "loss": 17.8334, "step": 15220 }, { "epoch": 0.278227649112545, "grad_norm": 8.450256509774235, "learning_rate": 8.469542644866478e-06, "loss": 18.408, "step": 15221 }, { "epoch": 0.27824592830899153, "grad_norm": 7.71338462243798, "learning_rate": 8.469329491783136e-06, "loss": 17.814, "step": 15222 }, { "epoch": 0.27826420750543807, "grad_norm": 6.619253118741429, "learning_rate": 8.469116326540094e-06, "loss": 17.4123, "step": 15223 }, { "epoch": 0.2782824867018846, "grad_norm": 8.064322766637254, "learning_rate": 8.468903149138102e-06, "loss": 18.4396, "step": 15224 }, { "epoch": 0.2783007658983311, "grad_norm": 7.097520508723367, "learning_rate": 8.468689959577902e-06, "loss": 17.9868, "step": 15225 }, { "epoch": 0.2783190450947776, "grad_norm": 6.357331624950789, "learning_rate": 8.468476757860247e-06, "loss": 17.1258, "step": 15226 }, { "epoch": 0.27833732429122415, "grad_norm": 6.795499416133291, "learning_rate": 8.468263543985882e-06, "loss": 18.116, "step": 15227 }, { "epoch": 0.2783556034876707, "grad_norm": 5.91248706771861, "learning_rate": 8.468050317955554e-06, "loss": 17.3861, "step": 15228 }, { "epoch": 0.2783738826841172, "grad_norm": 6.875592199296723, "learning_rate": 8.467837079770012e-06, "loss": 17.7825, "step": 15229 }, { "epoch": 0.2783921618805637, "grad_norm": 7.0886399327734475, "learning_rate": 8.46762382943e-06, "loss": 17.9897, "step": 15230 }, { "epoch": 0.27841044107701024, "grad_norm": 6.236725079378205, "learning_rate": 8.467410566936267e-06, "loss": 17.7535, "step": 15231 }, { "epoch": 0.2784287202734568, "grad_norm": 7.666071532080812, "learning_rate": 8.467197292289562e-06, "loss": 18.0279, "step": 15232 }, { "epoch": 0.2784469994699033, "grad_norm": 6.587910759013121, "learning_rate": 8.46698400549063e-06, "loss": 17.2562, "step": 15233 }, { "epoch": 0.27846527866634985, "grad_norm": 6.868167550343062, "learning_rate": 8.466770706540222e-06, "loss": 17.8229, "step": 15234 }, { "epoch": 0.2784835578627963, "grad_norm": 5.841143692922496, "learning_rate": 8.466557395439083e-06, "loss": 17.2415, "step": 15235 }, { "epoch": 0.27850183705924286, "grad_norm": 6.171709736836082, "learning_rate": 8.46634407218796e-06, "loss": 17.4398, "step": 15236 }, { "epoch": 0.2785201162556894, "grad_norm": 8.425015007776755, "learning_rate": 8.466130736787603e-06, "loss": 18.005, "step": 15237 }, { "epoch": 0.27853839545213593, "grad_norm": 6.6812126239316125, "learning_rate": 8.465917389238757e-06, "loss": 17.6213, "step": 15238 }, { "epoch": 0.27855667464858247, "grad_norm": 5.946752238077004, "learning_rate": 8.465704029542173e-06, "loss": 17.321, "step": 15239 }, { "epoch": 0.27857495384502895, "grad_norm": 6.8249810466527405, "learning_rate": 8.465490657698596e-06, "loss": 17.6091, "step": 15240 }, { "epoch": 0.2785932330414755, "grad_norm": 6.257116990467253, "learning_rate": 8.465277273708777e-06, "loss": 17.5928, "step": 15241 }, { "epoch": 0.278611512237922, "grad_norm": 8.377971703319272, "learning_rate": 8.465063877573459e-06, "loss": 18.2086, "step": 15242 }, { "epoch": 0.27862979143436856, "grad_norm": 6.373933554991297, "learning_rate": 8.464850469293396e-06, "loss": 17.3858, "step": 15243 }, { "epoch": 0.2786480706308151, "grad_norm": 6.426640808081779, "learning_rate": 8.46463704886933e-06, "loss": 17.4808, "step": 15244 }, { "epoch": 0.2786663498272616, "grad_norm": 6.208908698906014, "learning_rate": 8.464423616302012e-06, "loss": 17.2587, "step": 15245 }, { "epoch": 0.2786846290237081, "grad_norm": 6.426478401626894, "learning_rate": 8.46421017159219e-06, "loss": 17.3913, "step": 15246 }, { "epoch": 0.27870290822015464, "grad_norm": 6.509789723463751, "learning_rate": 8.463996714740614e-06, "loss": 17.543, "step": 15247 }, { "epoch": 0.2787211874166012, "grad_norm": 6.309781055249573, "learning_rate": 8.463783245748028e-06, "loss": 17.9775, "step": 15248 }, { "epoch": 0.2787394666130477, "grad_norm": 7.551419698529556, "learning_rate": 8.463569764615183e-06, "loss": 18.1277, "step": 15249 }, { "epoch": 0.2787577458094942, "grad_norm": 5.969849642908827, "learning_rate": 8.463356271342826e-06, "loss": 17.3494, "step": 15250 }, { "epoch": 0.27877602500594073, "grad_norm": 6.701235421654121, "learning_rate": 8.463142765931706e-06, "loss": 17.9205, "step": 15251 }, { "epoch": 0.27879430420238727, "grad_norm": 5.386231186161452, "learning_rate": 8.462929248382572e-06, "loss": 17.265, "step": 15252 }, { "epoch": 0.2788125833988338, "grad_norm": 7.491369075230487, "learning_rate": 8.46271571869617e-06, "loss": 17.4947, "step": 15253 }, { "epoch": 0.27883086259528034, "grad_norm": 6.546424260274679, "learning_rate": 8.46250217687325e-06, "loss": 17.5241, "step": 15254 }, { "epoch": 0.2788491417917268, "grad_norm": 5.975339806240297, "learning_rate": 8.462288622914563e-06, "loss": 17.4023, "step": 15255 }, { "epoch": 0.27886742098817335, "grad_norm": 5.741204871009067, "learning_rate": 8.462075056820852e-06, "loss": 17.3395, "step": 15256 }, { "epoch": 0.2788857001846199, "grad_norm": 6.986842947076972, "learning_rate": 8.46186147859287e-06, "loss": 17.5549, "step": 15257 }, { "epoch": 0.2789039793810664, "grad_norm": 8.520068982359538, "learning_rate": 8.461647888231363e-06, "loss": 18.7468, "step": 15258 }, { "epoch": 0.2789222585775129, "grad_norm": 5.132084312559286, "learning_rate": 8.46143428573708e-06, "loss": 16.9836, "step": 15259 }, { "epoch": 0.27894053777395944, "grad_norm": 6.018714877199563, "learning_rate": 8.46122067111077e-06, "loss": 17.1421, "step": 15260 }, { "epoch": 0.278958816970406, "grad_norm": 8.834433967022733, "learning_rate": 8.461007044353184e-06, "loss": 18.7856, "step": 15261 }, { "epoch": 0.2789770961668525, "grad_norm": 6.720650682562521, "learning_rate": 8.460793405465066e-06, "loss": 17.8257, "step": 15262 }, { "epoch": 0.27899537536329905, "grad_norm": 5.181558983727919, "learning_rate": 8.46057975444717e-06, "loss": 16.9849, "step": 15263 }, { "epoch": 0.2790136545597455, "grad_norm": 6.330977568142363, "learning_rate": 8.460366091300241e-06, "loss": 17.465, "step": 15264 }, { "epoch": 0.27903193375619206, "grad_norm": 7.0797762558721935, "learning_rate": 8.460152416025029e-06, "loss": 17.7323, "step": 15265 }, { "epoch": 0.2790502129526386, "grad_norm": 6.604603805911028, "learning_rate": 8.459938728622284e-06, "loss": 17.4136, "step": 15266 }, { "epoch": 0.27906849214908513, "grad_norm": 5.953423256831014, "learning_rate": 8.459725029092754e-06, "loss": 17.2549, "step": 15267 }, { "epoch": 0.27908677134553167, "grad_norm": 5.211822564212015, "learning_rate": 8.459511317437185e-06, "loss": 17.1655, "step": 15268 }, { "epoch": 0.27910505054197815, "grad_norm": 7.650219297859471, "learning_rate": 8.459297593656333e-06, "loss": 18.0305, "step": 15269 }, { "epoch": 0.2791233297384247, "grad_norm": 5.4568084959639025, "learning_rate": 8.45908385775094e-06, "loss": 17.2651, "step": 15270 }, { "epoch": 0.2791416089348712, "grad_norm": 6.108865668184069, "learning_rate": 8.45887010972176e-06, "loss": 17.3542, "step": 15271 }, { "epoch": 0.27915988813131776, "grad_norm": 7.042935463620705, "learning_rate": 8.45865634956954e-06, "loss": 17.7592, "step": 15272 }, { "epoch": 0.2791781673277643, "grad_norm": 7.0895414363913165, "learning_rate": 8.45844257729503e-06, "loss": 17.6137, "step": 15273 }, { "epoch": 0.2791964465242108, "grad_norm": 5.97027588161131, "learning_rate": 8.45822879289898e-06, "loss": 17.2089, "step": 15274 }, { "epoch": 0.2792147257206573, "grad_norm": 6.328044998520751, "learning_rate": 8.458014996382136e-06, "loss": 17.2472, "step": 15275 }, { "epoch": 0.27923300491710384, "grad_norm": 8.500126130847903, "learning_rate": 8.457801187745249e-06, "loss": 18.5316, "step": 15276 }, { "epoch": 0.2792512841135504, "grad_norm": 5.9999936008840296, "learning_rate": 8.45758736698907e-06, "loss": 17.2843, "step": 15277 }, { "epoch": 0.2792695633099969, "grad_norm": 5.768989858580754, "learning_rate": 8.457373534114347e-06, "loss": 17.23, "step": 15278 }, { "epoch": 0.2792878425064434, "grad_norm": 5.676785352440452, "learning_rate": 8.457159689121832e-06, "loss": 17.2136, "step": 15279 }, { "epoch": 0.27930612170288993, "grad_norm": 5.623070110265875, "learning_rate": 8.45694583201227e-06, "loss": 17.233, "step": 15280 }, { "epoch": 0.27932440089933647, "grad_norm": 5.297347293676569, "learning_rate": 8.456731962786413e-06, "loss": 16.9945, "step": 15281 }, { "epoch": 0.279342680095783, "grad_norm": 5.718687791115308, "learning_rate": 8.456518081445012e-06, "loss": 17.4141, "step": 15282 }, { "epoch": 0.27936095929222954, "grad_norm": 7.002780859931119, "learning_rate": 8.456304187988813e-06, "loss": 17.5433, "step": 15283 }, { "epoch": 0.279379238488676, "grad_norm": 4.166955275633923, "learning_rate": 8.456090282418567e-06, "loss": 16.6323, "step": 15284 }, { "epoch": 0.27939751768512255, "grad_norm": 6.373873436434632, "learning_rate": 8.455876364735029e-06, "loss": 17.451, "step": 15285 }, { "epoch": 0.2794157968815691, "grad_norm": 6.079561982362909, "learning_rate": 8.45566243493894e-06, "loss": 17.389, "step": 15286 }, { "epoch": 0.2794340760780156, "grad_norm": 6.948018820344659, "learning_rate": 8.455448493031055e-06, "loss": 17.5749, "step": 15287 }, { "epoch": 0.27945235527446216, "grad_norm": 5.979548652802669, "learning_rate": 8.455234539012124e-06, "loss": 17.3088, "step": 15288 }, { "epoch": 0.27947063447090864, "grad_norm": 6.235225684157927, "learning_rate": 8.455020572882895e-06, "loss": 17.519, "step": 15289 }, { "epoch": 0.2794889136673552, "grad_norm": 5.57882309167049, "learning_rate": 8.454806594644118e-06, "loss": 16.9601, "step": 15290 }, { "epoch": 0.2795071928638017, "grad_norm": 8.709692939224485, "learning_rate": 8.454592604296544e-06, "loss": 18.4858, "step": 15291 }, { "epoch": 0.27952547206024825, "grad_norm": 5.684572081931391, "learning_rate": 8.454378601840924e-06, "loss": 17.139, "step": 15292 }, { "epoch": 0.27954375125669473, "grad_norm": 5.942677312137361, "learning_rate": 8.454164587278005e-06, "loss": 17.2136, "step": 15293 }, { "epoch": 0.27956203045314126, "grad_norm": 5.81287032481252, "learning_rate": 8.45395056060854e-06, "loss": 17.1906, "step": 15294 }, { "epoch": 0.2795803096495878, "grad_norm": 7.4421357602697, "learning_rate": 8.453736521833279e-06, "loss": 17.7026, "step": 15295 }, { "epoch": 0.27959858884603433, "grad_norm": 6.87095794038651, "learning_rate": 8.453522470952968e-06, "loss": 17.4585, "step": 15296 }, { "epoch": 0.27961686804248087, "grad_norm": 8.767742792641915, "learning_rate": 8.453308407968363e-06, "loss": 17.8881, "step": 15297 }, { "epoch": 0.27963514723892735, "grad_norm": 6.283689084853071, "learning_rate": 8.45309433288021e-06, "loss": 17.3557, "step": 15298 }, { "epoch": 0.2796534264353739, "grad_norm": 6.797083943751389, "learning_rate": 8.452880245689264e-06, "loss": 17.7693, "step": 15299 }, { "epoch": 0.2796717056318204, "grad_norm": 6.775824178656483, "learning_rate": 8.452666146396268e-06, "loss": 17.8483, "step": 15300 }, { "epoch": 0.27968998482826696, "grad_norm": 7.137447876229566, "learning_rate": 8.452452035001981e-06, "loss": 18.0176, "step": 15301 }, { "epoch": 0.2797082640247135, "grad_norm": 6.7099557814928055, "learning_rate": 8.452237911507147e-06, "loss": 17.6612, "step": 15302 }, { "epoch": 0.27972654322116, "grad_norm": 6.140429114030082, "learning_rate": 8.45202377591252e-06, "loss": 17.333, "step": 15303 }, { "epoch": 0.2797448224176065, "grad_norm": 5.7035182472633705, "learning_rate": 8.451809628218847e-06, "loss": 17.1685, "step": 15304 }, { "epoch": 0.27976310161405304, "grad_norm": 6.961475674515516, "learning_rate": 8.451595468426882e-06, "loss": 17.4019, "step": 15305 }, { "epoch": 0.2797813808104996, "grad_norm": 6.981320637514193, "learning_rate": 8.451381296537375e-06, "loss": 17.6227, "step": 15306 }, { "epoch": 0.2797996600069461, "grad_norm": 7.217081963757125, "learning_rate": 8.451167112551076e-06, "loss": 17.7034, "step": 15307 }, { "epoch": 0.2798179392033926, "grad_norm": 6.63198113376361, "learning_rate": 8.450952916468734e-06, "loss": 17.5213, "step": 15308 }, { "epoch": 0.27983621839983913, "grad_norm": 6.327382881871625, "learning_rate": 8.450738708291105e-06, "loss": 17.511, "step": 15309 }, { "epoch": 0.27985449759628567, "grad_norm": 6.813876586995132, "learning_rate": 8.450524488018933e-06, "loss": 17.7001, "step": 15310 }, { "epoch": 0.2798727767927322, "grad_norm": 6.38672657410487, "learning_rate": 8.450310255652972e-06, "loss": 17.3549, "step": 15311 }, { "epoch": 0.27989105598917874, "grad_norm": 6.390512941314512, "learning_rate": 8.450096011193975e-06, "loss": 17.6012, "step": 15312 }, { "epoch": 0.2799093351856252, "grad_norm": 5.632404099979596, "learning_rate": 8.44988175464269e-06, "loss": 16.9748, "step": 15313 }, { "epoch": 0.27992761438207175, "grad_norm": 7.337163444232834, "learning_rate": 8.449667485999868e-06, "loss": 17.5949, "step": 15314 }, { "epoch": 0.2799458935785183, "grad_norm": 6.18730547586552, "learning_rate": 8.449453205266262e-06, "loss": 17.6807, "step": 15315 }, { "epoch": 0.2799641727749648, "grad_norm": 7.3207355037896225, "learning_rate": 8.449238912442621e-06, "loss": 17.6556, "step": 15316 }, { "epoch": 0.27998245197141136, "grad_norm": 5.9872163701769265, "learning_rate": 8.449024607529696e-06, "loss": 17.3126, "step": 15317 }, { "epoch": 0.28000073116785784, "grad_norm": 5.858221998433997, "learning_rate": 8.448810290528242e-06, "loss": 17.4937, "step": 15318 }, { "epoch": 0.2800190103643044, "grad_norm": 6.761771220793409, "learning_rate": 8.448595961439006e-06, "loss": 17.6793, "step": 15319 }, { "epoch": 0.2800372895607509, "grad_norm": 5.870076405893251, "learning_rate": 8.44838162026274e-06, "loss": 17.0499, "step": 15320 }, { "epoch": 0.28005556875719745, "grad_norm": 7.916559103623928, "learning_rate": 8.448167267000195e-06, "loss": 18.4175, "step": 15321 }, { "epoch": 0.280073847953644, "grad_norm": 6.791354085807862, "learning_rate": 8.447952901652123e-06, "loss": 17.4933, "step": 15322 }, { "epoch": 0.28009212715009046, "grad_norm": 7.122444690494139, "learning_rate": 8.447738524219277e-06, "loss": 18.0934, "step": 15323 }, { "epoch": 0.280110406346537, "grad_norm": 5.3402499262137155, "learning_rate": 8.447524134702405e-06, "loss": 17.1074, "step": 15324 }, { "epoch": 0.28012868554298354, "grad_norm": 6.198544112878502, "learning_rate": 8.44730973310226e-06, "loss": 17.4524, "step": 15325 }, { "epoch": 0.28014696473943007, "grad_norm": 7.0907625758521675, "learning_rate": 8.447095319419594e-06, "loss": 17.8427, "step": 15326 }, { "epoch": 0.28016524393587655, "grad_norm": 5.831380913616366, "learning_rate": 8.446880893655158e-06, "loss": 17.3841, "step": 15327 }, { "epoch": 0.2801835231323231, "grad_norm": 7.4686457544120275, "learning_rate": 8.446666455809705e-06, "loss": 17.7447, "step": 15328 }, { "epoch": 0.2802018023287696, "grad_norm": 6.541385684027912, "learning_rate": 8.446452005883982e-06, "loss": 17.8826, "step": 15329 }, { "epoch": 0.28022008152521616, "grad_norm": 6.4541582637804, "learning_rate": 8.446237543878748e-06, "loss": 17.5701, "step": 15330 }, { "epoch": 0.2802383607216627, "grad_norm": 6.377299000804159, "learning_rate": 8.446023069794747e-06, "loss": 17.5911, "step": 15331 }, { "epoch": 0.2802566399181092, "grad_norm": 6.58921984015208, "learning_rate": 8.445808583632734e-06, "loss": 17.7445, "step": 15332 }, { "epoch": 0.2802749191145557, "grad_norm": 7.439213185690679, "learning_rate": 8.445594085393463e-06, "loss": 17.6031, "step": 15333 }, { "epoch": 0.28029319831100225, "grad_norm": 6.227352568603799, "learning_rate": 8.445379575077683e-06, "loss": 17.3409, "step": 15334 }, { "epoch": 0.2803114775074488, "grad_norm": 8.549842256884657, "learning_rate": 8.445165052686147e-06, "loss": 18.3006, "step": 15335 }, { "epoch": 0.2803297567038953, "grad_norm": 7.032400686381592, "learning_rate": 8.444950518219605e-06, "loss": 17.9843, "step": 15336 }, { "epoch": 0.2803480359003418, "grad_norm": 6.67063177388569, "learning_rate": 8.444735971678812e-06, "loss": 17.4586, "step": 15337 }, { "epoch": 0.28036631509678833, "grad_norm": 7.207074160898403, "learning_rate": 8.444521413064517e-06, "loss": 17.9207, "step": 15338 }, { "epoch": 0.28038459429323487, "grad_norm": 5.3906090184452085, "learning_rate": 8.444306842377474e-06, "loss": 16.894, "step": 15339 }, { "epoch": 0.2804028734896814, "grad_norm": 8.653548263979046, "learning_rate": 8.444092259618435e-06, "loss": 18.2815, "step": 15340 }, { "epoch": 0.28042115268612794, "grad_norm": 6.18004532586676, "learning_rate": 8.443877664788151e-06, "loss": 17.6296, "step": 15341 }, { "epoch": 0.2804394318825744, "grad_norm": 8.501764140211932, "learning_rate": 8.443663057887374e-06, "loss": 17.7249, "step": 15342 }, { "epoch": 0.28045771107902095, "grad_norm": 5.7218125891780325, "learning_rate": 8.44344843891686e-06, "loss": 16.9147, "step": 15343 }, { "epoch": 0.2804759902754675, "grad_norm": 5.913127454267365, "learning_rate": 8.443233807877353e-06, "loss": 17.2629, "step": 15344 }, { "epoch": 0.280494269471914, "grad_norm": 6.820627725264064, "learning_rate": 8.443019164769613e-06, "loss": 17.5698, "step": 15345 }, { "epoch": 0.28051254866836056, "grad_norm": 6.022789544760202, "learning_rate": 8.442804509594388e-06, "loss": 17.3107, "step": 15346 }, { "epoch": 0.28053082786480704, "grad_norm": 8.94514036680201, "learning_rate": 8.442589842352436e-06, "loss": 18.5076, "step": 15347 }, { "epoch": 0.2805491070612536, "grad_norm": 6.1553618854905485, "learning_rate": 8.442375163044502e-06, "loss": 17.2621, "step": 15348 }, { "epoch": 0.2805673862577001, "grad_norm": 5.815419277840977, "learning_rate": 8.442160471671341e-06, "loss": 17.3209, "step": 15349 }, { "epoch": 0.28058566545414665, "grad_norm": 7.022413121021528, "learning_rate": 8.441945768233709e-06, "loss": 17.7672, "step": 15350 }, { "epoch": 0.2806039446505932, "grad_norm": 6.314723020154745, "learning_rate": 8.441731052732354e-06, "loss": 17.2963, "step": 15351 }, { "epoch": 0.28062222384703966, "grad_norm": 6.0191116234295645, "learning_rate": 8.44151632516803e-06, "loss": 17.4075, "step": 15352 }, { "epoch": 0.2806405030434862, "grad_norm": 5.652747273261852, "learning_rate": 8.44130158554149e-06, "loss": 17.2558, "step": 15353 }, { "epoch": 0.28065878223993274, "grad_norm": 7.9446397636589765, "learning_rate": 8.441086833853489e-06, "loss": 17.9289, "step": 15354 }, { "epoch": 0.28067706143637927, "grad_norm": 7.053167863551996, "learning_rate": 8.440872070104776e-06, "loss": 17.8498, "step": 15355 }, { "epoch": 0.2806953406328258, "grad_norm": 7.827079397809964, "learning_rate": 8.440657294296103e-06, "loss": 17.8238, "step": 15356 }, { "epoch": 0.2807136198292723, "grad_norm": 6.41107055789248, "learning_rate": 8.440442506428225e-06, "loss": 17.5368, "step": 15357 }, { "epoch": 0.2807318990257188, "grad_norm": 6.278500525959484, "learning_rate": 8.440227706501897e-06, "loss": 17.7145, "step": 15358 }, { "epoch": 0.28075017822216536, "grad_norm": 7.4986718163922745, "learning_rate": 8.440012894517868e-06, "loss": 17.7521, "step": 15359 }, { "epoch": 0.2807684574186119, "grad_norm": 6.184931507062844, "learning_rate": 8.439798070476891e-06, "loss": 17.5835, "step": 15360 }, { "epoch": 0.2807867366150584, "grad_norm": 6.681625724115947, "learning_rate": 8.439583234379722e-06, "loss": 17.6689, "step": 15361 }, { "epoch": 0.2808050158115049, "grad_norm": 5.35760730682152, "learning_rate": 8.439368386227114e-06, "loss": 16.8947, "step": 15362 }, { "epoch": 0.28082329500795145, "grad_norm": 6.548154694259325, "learning_rate": 8.439153526019814e-06, "loss": 17.3629, "step": 15363 }, { "epoch": 0.280841574204398, "grad_norm": 5.17716918435155, "learning_rate": 8.438938653758583e-06, "loss": 16.9253, "step": 15364 }, { "epoch": 0.2808598534008445, "grad_norm": 6.725656835076324, "learning_rate": 8.438723769444167e-06, "loss": 17.6387, "step": 15365 }, { "epoch": 0.280878132597291, "grad_norm": 7.6716436303256605, "learning_rate": 8.438508873077326e-06, "loss": 17.9415, "step": 15366 }, { "epoch": 0.28089641179373753, "grad_norm": 6.062060340168047, "learning_rate": 8.438293964658808e-06, "loss": 17.4276, "step": 15367 }, { "epoch": 0.28091469099018407, "grad_norm": 6.5146708472749, "learning_rate": 8.438079044189369e-06, "loss": 17.4647, "step": 15368 }, { "epoch": 0.2809329701866306, "grad_norm": 6.356540002782304, "learning_rate": 8.43786411166976e-06, "loss": 17.5814, "step": 15369 }, { "epoch": 0.28095124938307714, "grad_norm": 7.0890911572255995, "learning_rate": 8.437649167100736e-06, "loss": 18.2637, "step": 15370 }, { "epoch": 0.2809695285795236, "grad_norm": 6.842492378927014, "learning_rate": 8.43743421048305e-06, "loss": 17.8266, "step": 15371 }, { "epoch": 0.28098780777597016, "grad_norm": 7.198672708326323, "learning_rate": 8.437219241817456e-06, "loss": 18.1389, "step": 15372 }, { "epoch": 0.2810060869724167, "grad_norm": 7.038255403981865, "learning_rate": 8.437004261104706e-06, "loss": 17.7662, "step": 15373 }, { "epoch": 0.2810243661688632, "grad_norm": 7.696241643414113, "learning_rate": 8.436789268345555e-06, "loss": 17.8438, "step": 15374 }, { "epoch": 0.28104264536530976, "grad_norm": 6.362290596275576, "learning_rate": 8.436574263540756e-06, "loss": 17.4141, "step": 15375 }, { "epoch": 0.28106092456175624, "grad_norm": 6.772775790329577, "learning_rate": 8.436359246691062e-06, "loss": 17.8197, "step": 15376 }, { "epoch": 0.2810792037582028, "grad_norm": 6.125749331631318, "learning_rate": 8.436144217797227e-06, "loss": 17.3761, "step": 15377 }, { "epoch": 0.2810974829546493, "grad_norm": 7.208241423222614, "learning_rate": 8.435929176860004e-06, "loss": 18.3448, "step": 15378 }, { "epoch": 0.28111576215109585, "grad_norm": 7.937853810954681, "learning_rate": 8.435714123880149e-06, "loss": 17.792, "step": 15379 }, { "epoch": 0.2811340413475424, "grad_norm": 6.1100040916416605, "learning_rate": 8.435499058858413e-06, "loss": 17.5025, "step": 15380 }, { "epoch": 0.28115232054398887, "grad_norm": 8.344809145928709, "learning_rate": 8.435283981795551e-06, "loss": 18.2036, "step": 15381 }, { "epoch": 0.2811705997404354, "grad_norm": 7.70988367137395, "learning_rate": 8.435068892692317e-06, "loss": 18.2193, "step": 15382 }, { "epoch": 0.28118887893688194, "grad_norm": 6.554828258743084, "learning_rate": 8.434853791549464e-06, "loss": 17.4626, "step": 15383 }, { "epoch": 0.28120715813332847, "grad_norm": 5.832048397802159, "learning_rate": 8.434638678367747e-06, "loss": 17.0833, "step": 15384 }, { "epoch": 0.281225437329775, "grad_norm": 6.280671045705303, "learning_rate": 8.43442355314792e-06, "loss": 17.5083, "step": 15385 }, { "epoch": 0.2812437165262215, "grad_norm": 7.426942848885526, "learning_rate": 8.434208415890738e-06, "loss": 18.0593, "step": 15386 }, { "epoch": 0.281261995722668, "grad_norm": 6.082815319521803, "learning_rate": 8.433993266596949e-06, "loss": 17.0419, "step": 15387 }, { "epoch": 0.28128027491911456, "grad_norm": 15.46209403251262, "learning_rate": 8.433778105267314e-06, "loss": 18.4867, "step": 15388 }, { "epoch": 0.2812985541155611, "grad_norm": 7.5902355214347255, "learning_rate": 8.433562931902585e-06, "loss": 17.9188, "step": 15389 }, { "epoch": 0.28131683331200763, "grad_norm": 6.61907768246289, "learning_rate": 8.433347746503516e-06, "loss": 17.4187, "step": 15390 }, { "epoch": 0.2813351125084541, "grad_norm": 5.5708106598423965, "learning_rate": 8.433132549070861e-06, "loss": 17.286, "step": 15391 }, { "epoch": 0.28135339170490065, "grad_norm": 7.01446519671279, "learning_rate": 8.432917339605375e-06, "loss": 17.9308, "step": 15392 }, { "epoch": 0.2813716709013472, "grad_norm": 7.418061223298455, "learning_rate": 8.43270211810781e-06, "loss": 17.9232, "step": 15393 }, { "epoch": 0.2813899500977937, "grad_norm": 6.169425131779138, "learning_rate": 8.432486884578922e-06, "loss": 17.5341, "step": 15394 }, { "epoch": 0.2814082292942402, "grad_norm": 7.663989986517547, "learning_rate": 8.432271639019466e-06, "loss": 17.9359, "step": 15395 }, { "epoch": 0.28142650849068673, "grad_norm": 6.417772884984047, "learning_rate": 8.432056381430196e-06, "loss": 17.4621, "step": 15396 }, { "epoch": 0.28144478768713327, "grad_norm": 7.620574417470514, "learning_rate": 8.431841111811864e-06, "loss": 17.4967, "step": 15397 }, { "epoch": 0.2814630668835798, "grad_norm": 5.205424429005494, "learning_rate": 8.43162583016523e-06, "loss": 16.956, "step": 15398 }, { "epoch": 0.28148134608002634, "grad_norm": 9.94632121770399, "learning_rate": 8.431410536491044e-06, "loss": 18.3539, "step": 15399 }, { "epoch": 0.2814996252764728, "grad_norm": 6.594947742304105, "learning_rate": 8.431195230790062e-06, "loss": 17.7453, "step": 15400 }, { "epoch": 0.28151790447291936, "grad_norm": 5.987288957161305, "learning_rate": 8.430979913063038e-06, "loss": 17.1959, "step": 15401 }, { "epoch": 0.2815361836693659, "grad_norm": 7.359071074054522, "learning_rate": 8.430764583310727e-06, "loss": 17.8892, "step": 15402 }, { "epoch": 0.2815544628658124, "grad_norm": 6.409411253387844, "learning_rate": 8.430549241533885e-06, "loss": 17.3761, "step": 15403 }, { "epoch": 0.28157274206225896, "grad_norm": 6.850570345828789, "learning_rate": 8.430333887733265e-06, "loss": 17.7419, "step": 15404 }, { "epoch": 0.28159102125870544, "grad_norm": 7.4295910040714634, "learning_rate": 8.430118521909623e-06, "loss": 18.2057, "step": 15405 }, { "epoch": 0.281609300455152, "grad_norm": 7.416582043114402, "learning_rate": 8.429903144063711e-06, "loss": 17.7305, "step": 15406 }, { "epoch": 0.2816275796515985, "grad_norm": 8.537220994678437, "learning_rate": 8.429687754196287e-06, "loss": 18.4273, "step": 15407 }, { "epoch": 0.28164585884804505, "grad_norm": 6.840629534012434, "learning_rate": 8.429472352308106e-06, "loss": 17.5493, "step": 15408 }, { "epoch": 0.2816641380444916, "grad_norm": 7.47801949131773, "learning_rate": 8.429256938399922e-06, "loss": 17.9558, "step": 15409 }, { "epoch": 0.28168241724093807, "grad_norm": 6.860487889768455, "learning_rate": 8.42904151247249e-06, "loss": 17.5536, "step": 15410 }, { "epoch": 0.2817006964373846, "grad_norm": 7.406907383950079, "learning_rate": 8.428826074526564e-06, "loss": 18.189, "step": 15411 }, { "epoch": 0.28171897563383114, "grad_norm": 6.839722894076168, "learning_rate": 8.428610624562901e-06, "loss": 17.9266, "step": 15412 }, { "epoch": 0.2817372548302777, "grad_norm": 6.394184142195576, "learning_rate": 8.428395162582255e-06, "loss": 17.7828, "step": 15413 }, { "epoch": 0.2817555340267242, "grad_norm": 6.365026512467458, "learning_rate": 8.428179688585381e-06, "loss": 17.3171, "step": 15414 }, { "epoch": 0.2817738132231707, "grad_norm": 6.239152923057427, "learning_rate": 8.427964202573035e-06, "loss": 17.1903, "step": 15415 }, { "epoch": 0.2817920924196172, "grad_norm": 6.800554911763028, "learning_rate": 8.427748704545973e-06, "loss": 17.5401, "step": 15416 }, { "epoch": 0.28181037161606376, "grad_norm": 6.606856572064669, "learning_rate": 8.427533194504947e-06, "loss": 17.622, "step": 15417 }, { "epoch": 0.2818286508125103, "grad_norm": 7.890341607230291, "learning_rate": 8.427317672450717e-06, "loss": 18.1035, "step": 15418 }, { "epoch": 0.28184693000895683, "grad_norm": 4.968144029529073, "learning_rate": 8.427102138384035e-06, "loss": 16.7538, "step": 15419 }, { "epoch": 0.2818652092054033, "grad_norm": 6.890989192427271, "learning_rate": 8.426886592305656e-06, "loss": 17.4269, "step": 15420 }, { "epoch": 0.28188348840184985, "grad_norm": 6.498491295188947, "learning_rate": 8.426671034216339e-06, "loss": 17.6894, "step": 15421 }, { "epoch": 0.2819017675982964, "grad_norm": 7.178414595108676, "learning_rate": 8.426455464116836e-06, "loss": 17.9236, "step": 15422 }, { "epoch": 0.2819200467947429, "grad_norm": 8.1254247136996, "learning_rate": 8.426239882007906e-06, "loss": 18.5536, "step": 15423 }, { "epoch": 0.28193832599118945, "grad_norm": 7.220048205570177, "learning_rate": 8.4260242878903e-06, "loss": 17.6249, "step": 15424 }, { "epoch": 0.28195660518763593, "grad_norm": 7.2183637462018515, "learning_rate": 8.425808681764776e-06, "loss": 17.3812, "step": 15425 }, { "epoch": 0.28197488438408247, "grad_norm": 6.614329901433477, "learning_rate": 8.425593063632092e-06, "loss": 17.5936, "step": 15426 }, { "epoch": 0.281993163580529, "grad_norm": 7.934813215507083, "learning_rate": 8.425377433493e-06, "loss": 17.8976, "step": 15427 }, { "epoch": 0.28201144277697554, "grad_norm": 7.4831568881450785, "learning_rate": 8.425161791348258e-06, "loss": 17.8004, "step": 15428 }, { "epoch": 0.282029721973422, "grad_norm": 6.702634420573207, "learning_rate": 8.424946137198621e-06, "loss": 17.7498, "step": 15429 }, { "epoch": 0.28204800116986856, "grad_norm": 6.774928563263164, "learning_rate": 8.424730471044845e-06, "loss": 17.8457, "step": 15430 }, { "epoch": 0.2820662803663151, "grad_norm": 6.819093186442741, "learning_rate": 8.424514792887686e-06, "loss": 17.7781, "step": 15431 }, { "epoch": 0.2820845595627616, "grad_norm": 5.697913517231326, "learning_rate": 8.424299102727899e-06, "loss": 17.3248, "step": 15432 }, { "epoch": 0.28210283875920816, "grad_norm": 7.531715739191498, "learning_rate": 8.424083400566243e-06, "loss": 17.9968, "step": 15433 }, { "epoch": 0.28212111795565464, "grad_norm": 6.673835150266609, "learning_rate": 8.42386768640347e-06, "loss": 17.8088, "step": 15434 }, { "epoch": 0.2821393971521012, "grad_norm": 5.569046211206729, "learning_rate": 8.423651960240339e-06, "loss": 17.2213, "step": 15435 }, { "epoch": 0.2821576763485477, "grad_norm": 6.4540470447502285, "learning_rate": 8.423436222077603e-06, "loss": 17.4253, "step": 15436 }, { "epoch": 0.28217595554499425, "grad_norm": 7.489503211144116, "learning_rate": 8.423220471916022e-06, "loss": 17.9254, "step": 15437 }, { "epoch": 0.2821942347414408, "grad_norm": 7.188916281016509, "learning_rate": 8.423004709756348e-06, "loss": 17.5623, "step": 15438 }, { "epoch": 0.28221251393788727, "grad_norm": 7.454564411197409, "learning_rate": 8.422788935599341e-06, "loss": 17.6314, "step": 15439 }, { "epoch": 0.2822307931343338, "grad_norm": 5.741675049568417, "learning_rate": 8.422573149445756e-06, "loss": 17.2068, "step": 15440 }, { "epoch": 0.28224907233078034, "grad_norm": 6.689218308835845, "learning_rate": 8.422357351296349e-06, "loss": 17.7591, "step": 15441 }, { "epoch": 0.2822673515272269, "grad_norm": 6.211700649871322, "learning_rate": 8.422141541151878e-06, "loss": 17.6763, "step": 15442 }, { "epoch": 0.2822856307236734, "grad_norm": 6.638265554748326, "learning_rate": 8.421925719013096e-06, "loss": 17.5908, "step": 15443 }, { "epoch": 0.2823039099201199, "grad_norm": 6.925772444016115, "learning_rate": 8.421709884880762e-06, "loss": 17.3167, "step": 15444 }, { "epoch": 0.2823221891165664, "grad_norm": 7.298203244186044, "learning_rate": 8.42149403875563e-06, "loss": 17.8008, "step": 15445 }, { "epoch": 0.28234046831301296, "grad_norm": 6.7739748624787195, "learning_rate": 8.42127818063846e-06, "loss": 17.886, "step": 15446 }, { "epoch": 0.2823587475094595, "grad_norm": 6.675542708786623, "learning_rate": 8.421062310530008e-06, "loss": 17.7136, "step": 15447 }, { "epoch": 0.28237702670590603, "grad_norm": 6.530552262727731, "learning_rate": 8.420846428431026e-06, "loss": 17.6029, "step": 15448 }, { "epoch": 0.2823953059023525, "grad_norm": 7.964362371597339, "learning_rate": 8.420630534342277e-06, "loss": 18.4993, "step": 15449 }, { "epoch": 0.28241358509879905, "grad_norm": 6.968830900129372, "learning_rate": 8.420414628264515e-06, "loss": 17.7445, "step": 15450 }, { "epoch": 0.2824318642952456, "grad_norm": 8.277947052614286, "learning_rate": 8.420198710198495e-06, "loss": 17.8483, "step": 15451 }, { "epoch": 0.2824501434916921, "grad_norm": 5.802198301028127, "learning_rate": 8.419982780144974e-06, "loss": 17.0768, "step": 15452 }, { "epoch": 0.28246842268813865, "grad_norm": 6.816259597711021, "learning_rate": 8.419766838104712e-06, "loss": 17.5801, "step": 15453 }, { "epoch": 0.28248670188458513, "grad_norm": 6.2967691082072035, "learning_rate": 8.419550884078465e-06, "loss": 17.6557, "step": 15454 }, { "epoch": 0.28250498108103167, "grad_norm": 6.258892931758237, "learning_rate": 8.419334918066987e-06, "loss": 17.4094, "step": 15455 }, { "epoch": 0.2825232602774782, "grad_norm": 6.862327542047562, "learning_rate": 8.419118940071039e-06, "loss": 17.9269, "step": 15456 }, { "epoch": 0.28254153947392474, "grad_norm": 6.718567381681014, "learning_rate": 8.418902950091374e-06, "loss": 17.5564, "step": 15457 }, { "epoch": 0.2825598186703713, "grad_norm": 6.717837238026571, "learning_rate": 8.418686948128752e-06, "loss": 17.4344, "step": 15458 }, { "epoch": 0.28257809786681776, "grad_norm": 11.766996616844157, "learning_rate": 8.418470934183927e-06, "loss": 18.5154, "step": 15459 }, { "epoch": 0.2825963770632643, "grad_norm": 5.102631544374815, "learning_rate": 8.418254908257659e-06, "loss": 16.8542, "step": 15460 }, { "epoch": 0.28261465625971083, "grad_norm": 6.080400088498884, "learning_rate": 8.418038870350706e-06, "loss": 17.3789, "step": 15461 }, { "epoch": 0.28263293545615736, "grad_norm": 6.188239214951652, "learning_rate": 8.417822820463822e-06, "loss": 17.4083, "step": 15462 }, { "epoch": 0.28265121465260384, "grad_norm": 5.514424312512443, "learning_rate": 8.417606758597765e-06, "loss": 17.0802, "step": 15463 }, { "epoch": 0.2826694938490504, "grad_norm": 5.558196896729641, "learning_rate": 8.417390684753292e-06, "loss": 17.2066, "step": 15464 }, { "epoch": 0.2826877730454969, "grad_norm": 5.907595343879084, "learning_rate": 8.417174598931163e-06, "loss": 17.2644, "step": 15465 }, { "epoch": 0.28270605224194345, "grad_norm": 6.63700062319701, "learning_rate": 8.416958501132133e-06, "loss": 17.699, "step": 15466 }, { "epoch": 0.28272433143839, "grad_norm": 5.6432284918911755, "learning_rate": 8.41674239135696e-06, "loss": 17.0312, "step": 15467 }, { "epoch": 0.28274261063483647, "grad_norm": 6.289393242218711, "learning_rate": 8.416526269606402e-06, "loss": 17.5631, "step": 15468 }, { "epoch": 0.282760889831283, "grad_norm": 7.5551513783786355, "learning_rate": 8.416310135881214e-06, "loss": 17.7653, "step": 15469 }, { "epoch": 0.28277916902772954, "grad_norm": 5.956620052584515, "learning_rate": 8.416093990182157e-06, "loss": 17.3785, "step": 15470 }, { "epoch": 0.2827974482241761, "grad_norm": 6.86001654615364, "learning_rate": 8.415877832509987e-06, "loss": 17.6636, "step": 15471 }, { "epoch": 0.2828157274206226, "grad_norm": 6.382719215755634, "learning_rate": 8.415661662865462e-06, "loss": 17.7263, "step": 15472 }, { "epoch": 0.2828340066170691, "grad_norm": 6.1246574539133585, "learning_rate": 8.415445481249339e-06, "loss": 17.2325, "step": 15473 }, { "epoch": 0.2828522858135156, "grad_norm": 7.390938265665142, "learning_rate": 8.415229287662375e-06, "loss": 17.7339, "step": 15474 }, { "epoch": 0.28287056500996216, "grad_norm": 5.215737196107322, "learning_rate": 8.415013082105328e-06, "loss": 17.035, "step": 15475 }, { "epoch": 0.2828888442064087, "grad_norm": 7.755466898336756, "learning_rate": 8.414796864578957e-06, "loss": 17.7883, "step": 15476 }, { "epoch": 0.28290712340285523, "grad_norm": 6.8293170026329255, "learning_rate": 8.41458063508402e-06, "loss": 17.6541, "step": 15477 }, { "epoch": 0.2829254025993017, "grad_norm": 6.517733841349658, "learning_rate": 8.414364393621274e-06, "loss": 17.5341, "step": 15478 }, { "epoch": 0.28294368179574825, "grad_norm": 5.862465874817472, "learning_rate": 8.414148140191479e-06, "loss": 17.1817, "step": 15479 }, { "epoch": 0.2829619609921948, "grad_norm": 7.381018863754203, "learning_rate": 8.41393187479539e-06, "loss": 17.7526, "step": 15480 }, { "epoch": 0.2829802401886413, "grad_norm": 6.833948647150858, "learning_rate": 8.413715597433764e-06, "loss": 17.6928, "step": 15481 }, { "epoch": 0.28299851938508785, "grad_norm": 6.521558147809372, "learning_rate": 8.413499308107363e-06, "loss": 17.5961, "step": 15482 }, { "epoch": 0.28301679858153433, "grad_norm": 5.946615365920805, "learning_rate": 8.413283006816943e-06, "loss": 17.3043, "step": 15483 }, { "epoch": 0.28303507777798087, "grad_norm": 6.485929179868215, "learning_rate": 8.413066693563262e-06, "loss": 17.5305, "step": 15484 }, { "epoch": 0.2830533569744274, "grad_norm": 5.945363387251339, "learning_rate": 8.412850368347077e-06, "loss": 17.0616, "step": 15485 }, { "epoch": 0.28307163617087394, "grad_norm": 6.54235154309566, "learning_rate": 8.412634031169148e-06, "loss": 17.3017, "step": 15486 }, { "epoch": 0.2830899153673205, "grad_norm": 5.686563129486015, "learning_rate": 8.412417682030234e-06, "loss": 17.4352, "step": 15487 }, { "epoch": 0.28310819456376696, "grad_norm": 6.595737961887192, "learning_rate": 8.412201320931092e-06, "loss": 17.5812, "step": 15488 }, { "epoch": 0.2831264737602135, "grad_norm": 6.398344498781888, "learning_rate": 8.41198494787248e-06, "loss": 17.2987, "step": 15489 }, { "epoch": 0.28314475295666003, "grad_norm": 6.557725214850864, "learning_rate": 8.411768562855157e-06, "loss": 17.7187, "step": 15490 }, { "epoch": 0.28316303215310656, "grad_norm": 6.50233786074878, "learning_rate": 8.411552165879881e-06, "loss": 17.3658, "step": 15491 }, { "epoch": 0.2831813113495531, "grad_norm": 6.1620822188746125, "learning_rate": 8.411335756947411e-06, "loss": 17.5341, "step": 15492 }, { "epoch": 0.2831995905459996, "grad_norm": 5.969493183820305, "learning_rate": 8.411119336058506e-06, "loss": 17.4539, "step": 15493 }, { "epoch": 0.2832178697424461, "grad_norm": 5.99854252879164, "learning_rate": 8.410902903213924e-06, "loss": 17.3071, "step": 15494 }, { "epoch": 0.28323614893889265, "grad_norm": 5.433585164652213, "learning_rate": 8.410686458414421e-06, "loss": 17.0368, "step": 15495 }, { "epoch": 0.2832544281353392, "grad_norm": 7.437621076301175, "learning_rate": 8.41047000166076e-06, "loss": 17.613, "step": 15496 }, { "epoch": 0.28327270733178567, "grad_norm": 6.106388244878, "learning_rate": 8.410253532953696e-06, "loss": 17.1693, "step": 15497 }, { "epoch": 0.2832909865282322, "grad_norm": 6.386802219835082, "learning_rate": 8.410037052293992e-06, "loss": 17.2761, "step": 15498 }, { "epoch": 0.28330926572467874, "grad_norm": 6.369372505903644, "learning_rate": 8.409820559682402e-06, "loss": 17.7474, "step": 15499 }, { "epoch": 0.2833275449211253, "grad_norm": 6.271390582198113, "learning_rate": 8.409604055119687e-06, "loss": 17.5237, "step": 15500 }, { "epoch": 0.2833458241175718, "grad_norm": 6.5256147398044835, "learning_rate": 8.409387538606605e-06, "loss": 17.1695, "step": 15501 }, { "epoch": 0.2833641033140183, "grad_norm": 5.786452002426799, "learning_rate": 8.409171010143916e-06, "loss": 17.4172, "step": 15502 }, { "epoch": 0.2833823825104648, "grad_norm": 8.84299267926821, "learning_rate": 8.40895446973238e-06, "loss": 18.0781, "step": 15503 }, { "epoch": 0.28340066170691136, "grad_norm": 5.691610922571614, "learning_rate": 8.408737917372751e-06, "loss": 17.104, "step": 15504 }, { "epoch": 0.2834189409033579, "grad_norm": 6.379030894126828, "learning_rate": 8.408521353065796e-06, "loss": 17.4691, "step": 15505 }, { "epoch": 0.28343722009980443, "grad_norm": 7.063629606285928, "learning_rate": 8.408304776812266e-06, "loss": 17.5241, "step": 15506 }, { "epoch": 0.2834554992962509, "grad_norm": 6.4461397749199545, "learning_rate": 8.408088188612923e-06, "loss": 17.4663, "step": 15507 }, { "epoch": 0.28347377849269745, "grad_norm": 7.239733713449572, "learning_rate": 8.407871588468527e-06, "loss": 17.5837, "step": 15508 }, { "epoch": 0.283492057689144, "grad_norm": 6.703860169477245, "learning_rate": 8.40765497637984e-06, "loss": 17.66, "step": 15509 }, { "epoch": 0.2835103368855905, "grad_norm": 6.8981920422858485, "learning_rate": 8.407438352347614e-06, "loss": 17.2924, "step": 15510 }, { "epoch": 0.28352861608203705, "grad_norm": 6.9308056613648485, "learning_rate": 8.407221716372615e-06, "loss": 17.6194, "step": 15511 }, { "epoch": 0.28354689527848354, "grad_norm": 8.630598151276228, "learning_rate": 8.4070050684556e-06, "loss": 18.5181, "step": 15512 }, { "epoch": 0.28356517447493007, "grad_norm": 6.140531931427257, "learning_rate": 8.406788408597324e-06, "loss": 17.1919, "step": 15513 }, { "epoch": 0.2835834536713766, "grad_norm": 6.105063839189512, "learning_rate": 8.406571736798554e-06, "loss": 17.3691, "step": 15514 }, { "epoch": 0.28360173286782314, "grad_norm": 5.6438490398010845, "learning_rate": 8.406355053060044e-06, "loss": 17.1524, "step": 15515 }, { "epoch": 0.2836200120642697, "grad_norm": 5.594531492297863, "learning_rate": 8.406138357382556e-06, "loss": 17.2135, "step": 15516 }, { "epoch": 0.28363829126071616, "grad_norm": 8.008643271750694, "learning_rate": 8.405921649766849e-06, "loss": 18.2914, "step": 15517 }, { "epoch": 0.2836565704571627, "grad_norm": 5.957476832027726, "learning_rate": 8.40570493021368e-06, "loss": 17.182, "step": 15518 }, { "epoch": 0.28367484965360923, "grad_norm": 6.777397209515814, "learning_rate": 8.405488198723813e-06, "loss": 17.5969, "step": 15519 }, { "epoch": 0.28369312885005576, "grad_norm": 6.620547159990193, "learning_rate": 8.405271455298005e-06, "loss": 17.5505, "step": 15520 }, { "epoch": 0.2837114080465023, "grad_norm": 6.648656132268679, "learning_rate": 8.405054699937014e-06, "loss": 17.4893, "step": 15521 }, { "epoch": 0.2837296872429488, "grad_norm": 5.712029974103151, "learning_rate": 8.404837932641604e-06, "loss": 17.2915, "step": 15522 }, { "epoch": 0.2837479664393953, "grad_norm": 7.236765825272202, "learning_rate": 8.404621153412532e-06, "loss": 17.6133, "step": 15523 }, { "epoch": 0.28376624563584185, "grad_norm": 8.10122431474489, "learning_rate": 8.404404362250558e-06, "loss": 18.0702, "step": 15524 }, { "epoch": 0.2837845248322884, "grad_norm": 7.144456215384271, "learning_rate": 8.404187559156443e-06, "loss": 17.7497, "step": 15525 }, { "epoch": 0.2838028040287349, "grad_norm": 6.659411430034418, "learning_rate": 8.403970744130945e-06, "loss": 17.6221, "step": 15526 }, { "epoch": 0.2838210832251814, "grad_norm": 7.175396214025024, "learning_rate": 8.403753917174825e-06, "loss": 17.8914, "step": 15527 }, { "epoch": 0.28383936242162794, "grad_norm": 6.189528774703327, "learning_rate": 8.403537078288843e-06, "loss": 17.5811, "step": 15528 }, { "epoch": 0.2838576416180745, "grad_norm": 5.856297634820472, "learning_rate": 8.403320227473759e-06, "loss": 17.1073, "step": 15529 }, { "epoch": 0.283875920814521, "grad_norm": 8.643114153220317, "learning_rate": 8.403103364730333e-06, "loss": 18.4561, "step": 15530 }, { "epoch": 0.2838942000109675, "grad_norm": 7.828290680158178, "learning_rate": 8.402886490059325e-06, "loss": 18.0083, "step": 15531 }, { "epoch": 0.283912479207414, "grad_norm": 6.445601967259433, "learning_rate": 8.402669603461495e-06, "loss": 17.2818, "step": 15532 }, { "epoch": 0.28393075840386056, "grad_norm": 8.173572547923358, "learning_rate": 8.402452704937602e-06, "loss": 18.4044, "step": 15533 }, { "epoch": 0.2839490376003071, "grad_norm": 7.3105068229246415, "learning_rate": 8.40223579448841e-06, "loss": 17.8771, "step": 15534 }, { "epoch": 0.28396731679675363, "grad_norm": 8.043554647759427, "learning_rate": 8.402018872114675e-06, "loss": 17.6716, "step": 15535 }, { "epoch": 0.2839855959932001, "grad_norm": 8.575846251880327, "learning_rate": 8.40180193781716e-06, "loss": 18.127, "step": 15536 }, { "epoch": 0.28400387518964665, "grad_norm": 8.448946370612017, "learning_rate": 8.401584991596623e-06, "loss": 18.2131, "step": 15537 }, { "epoch": 0.2840221543860932, "grad_norm": 6.12334406272895, "learning_rate": 8.401368033453827e-06, "loss": 17.3053, "step": 15538 }, { "epoch": 0.2840404335825397, "grad_norm": 6.517376042846156, "learning_rate": 8.401151063389533e-06, "loss": 17.3515, "step": 15539 }, { "epoch": 0.28405871277898626, "grad_norm": 6.126027630011614, "learning_rate": 8.400934081404497e-06, "loss": 17.2624, "step": 15540 }, { "epoch": 0.28407699197543274, "grad_norm": 6.057541862518091, "learning_rate": 8.400717087499483e-06, "loss": 17.1479, "step": 15541 }, { "epoch": 0.28409527117187927, "grad_norm": 6.5286989223360345, "learning_rate": 8.40050008167525e-06, "loss": 17.3018, "step": 15542 }, { "epoch": 0.2841135503683258, "grad_norm": 7.968956088806122, "learning_rate": 8.40028306393256e-06, "loss": 17.7986, "step": 15543 }, { "epoch": 0.28413182956477234, "grad_norm": 5.88287122620639, "learning_rate": 8.400066034272173e-06, "loss": 17.3807, "step": 15544 }, { "epoch": 0.2841501087612189, "grad_norm": 7.255389013799567, "learning_rate": 8.399848992694849e-06, "loss": 17.6915, "step": 15545 }, { "epoch": 0.28416838795766536, "grad_norm": 6.207393249947155, "learning_rate": 8.39963193920135e-06, "loss": 17.6513, "step": 15546 }, { "epoch": 0.2841866671541119, "grad_norm": 5.493874442691914, "learning_rate": 8.399414873792435e-06, "loss": 17.0561, "step": 15547 }, { "epoch": 0.28420494635055843, "grad_norm": 6.265429232395019, "learning_rate": 8.399197796468867e-06, "loss": 17.4793, "step": 15548 }, { "epoch": 0.28422322554700497, "grad_norm": 6.986659093185191, "learning_rate": 8.398980707231405e-06, "loss": 17.6765, "step": 15549 }, { "epoch": 0.2842415047434515, "grad_norm": 5.722656947055226, "learning_rate": 8.398763606080812e-06, "loss": 17.0972, "step": 15550 }, { "epoch": 0.284259783939898, "grad_norm": 6.6297669680583065, "learning_rate": 8.398546493017846e-06, "loss": 17.5315, "step": 15551 }, { "epoch": 0.2842780631363445, "grad_norm": 6.3911646778028155, "learning_rate": 8.39832936804327e-06, "loss": 17.3867, "step": 15552 }, { "epoch": 0.28429634233279105, "grad_norm": 6.234982800213382, "learning_rate": 8.398112231157844e-06, "loss": 17.3341, "step": 15553 }, { "epoch": 0.2843146215292376, "grad_norm": 7.501273893139183, "learning_rate": 8.39789508236233e-06, "loss": 18.1063, "step": 15554 }, { "epoch": 0.2843329007256841, "grad_norm": 5.731369411501362, "learning_rate": 8.397677921657488e-06, "loss": 17.0956, "step": 15555 }, { "epoch": 0.2843511799221306, "grad_norm": 5.81032010845882, "learning_rate": 8.397460749044079e-06, "loss": 17.3655, "step": 15556 }, { "epoch": 0.28436945911857714, "grad_norm": 7.082287812047839, "learning_rate": 8.397243564522867e-06, "loss": 17.5869, "step": 15557 }, { "epoch": 0.2843877383150237, "grad_norm": 6.594087648334816, "learning_rate": 8.39702636809461e-06, "loss": 17.4453, "step": 15558 }, { "epoch": 0.2844060175114702, "grad_norm": 6.80664865386882, "learning_rate": 8.39680915976007e-06, "loss": 17.7205, "step": 15559 }, { "epoch": 0.28442429670791675, "grad_norm": 7.412781021068577, "learning_rate": 8.39659193952001e-06, "loss": 17.7192, "step": 15560 }, { "epoch": 0.2844425759043632, "grad_norm": 5.867012061418011, "learning_rate": 8.39637470737519e-06, "loss": 17.2483, "step": 15561 }, { "epoch": 0.28446085510080976, "grad_norm": 9.014390204684132, "learning_rate": 8.39615746332637e-06, "loss": 18.6399, "step": 15562 }, { "epoch": 0.2844791342972563, "grad_norm": 7.759039131336317, "learning_rate": 8.395940207374314e-06, "loss": 17.5036, "step": 15563 }, { "epoch": 0.28449741349370283, "grad_norm": 6.503756645272427, "learning_rate": 8.395722939519782e-06, "loss": 17.5279, "step": 15564 }, { "epoch": 0.2845156926901493, "grad_norm": 7.171876314404471, "learning_rate": 8.395505659763534e-06, "loss": 17.8446, "step": 15565 }, { "epoch": 0.28453397188659585, "grad_norm": 6.245292427653352, "learning_rate": 8.395288368106334e-06, "loss": 17.5238, "step": 15566 }, { "epoch": 0.2845522510830424, "grad_norm": 6.8851167604665084, "learning_rate": 8.395071064548945e-06, "loss": 17.7864, "step": 15567 }, { "epoch": 0.2845705302794889, "grad_norm": 5.5415031339971925, "learning_rate": 8.394853749092125e-06, "loss": 17.0479, "step": 15568 }, { "epoch": 0.28458880947593546, "grad_norm": 5.524736013309662, "learning_rate": 8.394636421736637e-06, "loss": 17.1075, "step": 15569 }, { "epoch": 0.28460708867238194, "grad_norm": 5.053254637388772, "learning_rate": 8.394419082483242e-06, "loss": 16.9987, "step": 15570 }, { "epoch": 0.28462536786882847, "grad_norm": 6.435384113223739, "learning_rate": 8.394201731332705e-06, "loss": 17.3633, "step": 15571 }, { "epoch": 0.284643647065275, "grad_norm": 7.507276131995696, "learning_rate": 8.393984368285784e-06, "loss": 18.0902, "step": 15572 }, { "epoch": 0.28466192626172154, "grad_norm": 7.964726173875384, "learning_rate": 8.393766993343241e-06, "loss": 18.1453, "step": 15573 }, { "epoch": 0.2846802054581681, "grad_norm": 7.2337262115679914, "learning_rate": 8.393549606505842e-06, "loss": 17.8401, "step": 15574 }, { "epoch": 0.28469848465461456, "grad_norm": 5.573186046338195, "learning_rate": 8.393332207774345e-06, "loss": 17.1204, "step": 15575 }, { "epoch": 0.2847167638510611, "grad_norm": 8.1926876728909, "learning_rate": 8.393114797149513e-06, "loss": 18.4901, "step": 15576 }, { "epoch": 0.28473504304750763, "grad_norm": 6.337786905884441, "learning_rate": 8.392897374632107e-06, "loss": 17.5824, "step": 15577 }, { "epoch": 0.28475332224395417, "grad_norm": 6.646307893594947, "learning_rate": 8.392679940222893e-06, "loss": 17.3747, "step": 15578 }, { "epoch": 0.2847716014404007, "grad_norm": 5.773095729081687, "learning_rate": 8.392462493922629e-06, "loss": 17.1753, "step": 15579 }, { "epoch": 0.2847898806368472, "grad_norm": 7.033828873581546, "learning_rate": 8.392245035732077e-06, "loss": 17.9067, "step": 15580 }, { "epoch": 0.2848081598332937, "grad_norm": 6.589963825307848, "learning_rate": 8.392027565652001e-06, "loss": 17.4564, "step": 15581 }, { "epoch": 0.28482643902974025, "grad_norm": 6.496213127727764, "learning_rate": 8.391810083683163e-06, "loss": 17.5729, "step": 15582 }, { "epoch": 0.2848447182261868, "grad_norm": 6.22426655532525, "learning_rate": 8.391592589826325e-06, "loss": 17.6297, "step": 15583 }, { "epoch": 0.2848629974226333, "grad_norm": 7.330933426305583, "learning_rate": 8.391375084082249e-06, "loss": 17.9936, "step": 15584 }, { "epoch": 0.2848812766190798, "grad_norm": 5.628551605391713, "learning_rate": 8.391157566451697e-06, "loss": 16.9961, "step": 15585 }, { "epoch": 0.28489955581552634, "grad_norm": 5.046472674557297, "learning_rate": 8.390940036935433e-06, "loss": 16.9365, "step": 15586 }, { "epoch": 0.2849178350119729, "grad_norm": 6.914505875474923, "learning_rate": 8.39072249553422e-06, "loss": 17.3626, "step": 15587 }, { "epoch": 0.2849361142084194, "grad_norm": 7.25076524166895, "learning_rate": 8.390504942248817e-06, "loss": 17.9625, "step": 15588 }, { "epoch": 0.28495439340486595, "grad_norm": 7.699190980540354, "learning_rate": 8.390287377079989e-06, "loss": 17.5692, "step": 15589 }, { "epoch": 0.2849726726013124, "grad_norm": 6.518746589959383, "learning_rate": 8.390069800028497e-06, "loss": 17.3474, "step": 15590 }, { "epoch": 0.28499095179775896, "grad_norm": 6.774196109811676, "learning_rate": 8.389852211095104e-06, "loss": 17.5621, "step": 15591 }, { "epoch": 0.2850092309942055, "grad_norm": 7.007953092574309, "learning_rate": 8.389634610280576e-06, "loss": 17.7761, "step": 15592 }, { "epoch": 0.28502751019065203, "grad_norm": 7.785073531272435, "learning_rate": 8.38941699758567e-06, "loss": 18.2594, "step": 15593 }, { "epoch": 0.28504578938709857, "grad_norm": 5.737027901527635, "learning_rate": 8.389199373011151e-06, "loss": 16.9646, "step": 15594 }, { "epoch": 0.28506406858354505, "grad_norm": 6.1317094096416005, "learning_rate": 8.388981736557786e-06, "loss": 17.276, "step": 15595 }, { "epoch": 0.2850823477799916, "grad_norm": 5.552608549638589, "learning_rate": 8.388764088226332e-06, "loss": 17.1453, "step": 15596 }, { "epoch": 0.2851006269764381, "grad_norm": 6.541323176059032, "learning_rate": 8.388546428017553e-06, "loss": 17.572, "step": 15597 }, { "epoch": 0.28511890617288466, "grad_norm": 6.5776082966284495, "learning_rate": 8.388328755932213e-06, "loss": 17.3414, "step": 15598 }, { "epoch": 0.28513718536933114, "grad_norm": 5.859995221506433, "learning_rate": 8.388111071971077e-06, "loss": 17.3288, "step": 15599 }, { "epoch": 0.28515546456577767, "grad_norm": 6.283259630316175, "learning_rate": 8.387893376134903e-06, "loss": 17.615, "step": 15600 }, { "epoch": 0.2851737437622242, "grad_norm": 8.207639137259413, "learning_rate": 8.387675668424457e-06, "loss": 18.3846, "step": 15601 }, { "epoch": 0.28519202295867074, "grad_norm": 6.3159778451564, "learning_rate": 8.387457948840503e-06, "loss": 17.7309, "step": 15602 }, { "epoch": 0.2852103021551173, "grad_norm": 7.207083762354696, "learning_rate": 8.387240217383804e-06, "loss": 17.754, "step": 15603 }, { "epoch": 0.28522858135156376, "grad_norm": 6.95631087327876, "learning_rate": 8.38702247405512e-06, "loss": 17.6824, "step": 15604 }, { "epoch": 0.2852468605480103, "grad_norm": 7.055379196665715, "learning_rate": 8.386804718855217e-06, "loss": 17.7365, "step": 15605 }, { "epoch": 0.28526513974445683, "grad_norm": 6.141082121277028, "learning_rate": 8.386586951784857e-06, "loss": 17.2589, "step": 15606 }, { "epoch": 0.28528341894090337, "grad_norm": 6.2588554003648555, "learning_rate": 8.386369172844803e-06, "loss": 17.4175, "step": 15607 }, { "epoch": 0.2853016981373499, "grad_norm": 5.849630957286629, "learning_rate": 8.386151382035819e-06, "loss": 17.2994, "step": 15608 }, { "epoch": 0.2853199773337964, "grad_norm": 7.919239102329731, "learning_rate": 8.38593357935867e-06, "loss": 18.2311, "step": 15609 }, { "epoch": 0.2853382565302429, "grad_norm": 7.460346656682483, "learning_rate": 8.385715764814115e-06, "loss": 18.0355, "step": 15610 }, { "epoch": 0.28535653572668945, "grad_norm": 7.166477420381712, "learning_rate": 8.385497938402921e-06, "loss": 17.9849, "step": 15611 }, { "epoch": 0.285374814923136, "grad_norm": 7.811072381805779, "learning_rate": 8.385280100125852e-06, "loss": 17.874, "step": 15612 }, { "epoch": 0.2853930941195825, "grad_norm": 6.212841050772617, "learning_rate": 8.385062249983668e-06, "loss": 17.7724, "step": 15613 }, { "epoch": 0.285411373316029, "grad_norm": 8.286750865831028, "learning_rate": 8.384844387977136e-06, "loss": 17.7, "step": 15614 }, { "epoch": 0.28542965251247554, "grad_norm": 7.108771609697807, "learning_rate": 8.384626514107017e-06, "loss": 17.7998, "step": 15615 }, { "epoch": 0.2854479317089221, "grad_norm": 5.327994253042193, "learning_rate": 8.384408628374076e-06, "loss": 17.1421, "step": 15616 }, { "epoch": 0.2854662109053686, "grad_norm": 6.386484428558483, "learning_rate": 8.384190730779077e-06, "loss": 17.5845, "step": 15617 }, { "epoch": 0.28548449010181515, "grad_norm": 5.628792152596963, "learning_rate": 8.383972821322783e-06, "loss": 17.4626, "step": 15618 }, { "epoch": 0.2855027692982616, "grad_norm": 6.1920812252869855, "learning_rate": 8.383754900005958e-06, "loss": 17.5209, "step": 15619 }, { "epoch": 0.28552104849470816, "grad_norm": 7.196441400429767, "learning_rate": 8.383536966829365e-06, "loss": 17.8727, "step": 15620 }, { "epoch": 0.2855393276911547, "grad_norm": 6.394134189644871, "learning_rate": 8.38331902179377e-06, "loss": 17.3357, "step": 15621 }, { "epoch": 0.28555760688760123, "grad_norm": 7.351549259202254, "learning_rate": 8.383101064899934e-06, "loss": 18.0577, "step": 15622 }, { "epoch": 0.28557588608404777, "grad_norm": 8.885390520908818, "learning_rate": 8.382883096148623e-06, "loss": 17.5862, "step": 15623 }, { "epoch": 0.28559416528049425, "grad_norm": 7.79950581234333, "learning_rate": 8.382665115540601e-06, "loss": 18.0429, "step": 15624 }, { "epoch": 0.2856124444769408, "grad_norm": 7.142911245825157, "learning_rate": 8.38244712307663e-06, "loss": 17.7714, "step": 15625 }, { "epoch": 0.2856307236733873, "grad_norm": 7.328069278498716, "learning_rate": 8.382229118757475e-06, "loss": 17.7837, "step": 15626 }, { "epoch": 0.28564900286983386, "grad_norm": 6.390514179583239, "learning_rate": 8.382011102583903e-06, "loss": 17.5888, "step": 15627 }, { "epoch": 0.2856672820662804, "grad_norm": 6.003095180740606, "learning_rate": 8.381793074556673e-06, "loss": 17.6111, "step": 15628 }, { "epoch": 0.2856855612627269, "grad_norm": 7.090927574776756, "learning_rate": 8.38157503467655e-06, "loss": 17.7163, "step": 15629 }, { "epoch": 0.2857038404591734, "grad_norm": 10.159727596808448, "learning_rate": 8.381356982944304e-06, "loss": 18.8136, "step": 15630 }, { "epoch": 0.28572211965561994, "grad_norm": 6.746606551618541, "learning_rate": 8.381138919360693e-06, "loss": 17.4561, "step": 15631 }, { "epoch": 0.2857403988520665, "grad_norm": 8.895814824886411, "learning_rate": 8.380920843926485e-06, "loss": 18.0273, "step": 15632 }, { "epoch": 0.285758678048513, "grad_norm": 7.931643137647633, "learning_rate": 8.380702756642443e-06, "loss": 18.2132, "step": 15633 }, { "epoch": 0.2857769572449595, "grad_norm": 6.607209696763906, "learning_rate": 8.380484657509329e-06, "loss": 17.5462, "step": 15634 }, { "epoch": 0.28579523644140603, "grad_norm": 6.397507164123776, "learning_rate": 8.380266546527911e-06, "loss": 17.6273, "step": 15635 }, { "epoch": 0.28581351563785257, "grad_norm": 6.787735261967875, "learning_rate": 8.380048423698952e-06, "loss": 17.4471, "step": 15636 }, { "epoch": 0.2858317948342991, "grad_norm": 8.94183718488205, "learning_rate": 8.379830289023216e-06, "loss": 18.6105, "step": 15637 }, { "epoch": 0.2858500740307456, "grad_norm": 6.901059258190413, "learning_rate": 8.379612142501468e-06, "loss": 17.6378, "step": 15638 }, { "epoch": 0.2858683532271921, "grad_norm": 7.276057520784042, "learning_rate": 8.379393984134473e-06, "loss": 17.645, "step": 15639 }, { "epoch": 0.28588663242363865, "grad_norm": 6.7987410673042605, "learning_rate": 8.379175813922998e-06, "loss": 17.6281, "step": 15640 }, { "epoch": 0.2859049116200852, "grad_norm": 5.242972500649996, "learning_rate": 8.378957631867801e-06, "loss": 17.0635, "step": 15641 }, { "epoch": 0.2859231908165317, "grad_norm": 6.686899850055457, "learning_rate": 8.378739437969653e-06, "loss": 17.3893, "step": 15642 }, { "epoch": 0.2859414700129782, "grad_norm": 6.254068526072431, "learning_rate": 8.378521232229316e-06, "loss": 17.3998, "step": 15643 }, { "epoch": 0.28595974920942474, "grad_norm": 6.1499387344539915, "learning_rate": 8.378303014647555e-06, "loss": 17.3369, "step": 15644 }, { "epoch": 0.2859780284058713, "grad_norm": 7.888230663721426, "learning_rate": 8.378084785225134e-06, "loss": 18.1959, "step": 15645 }, { "epoch": 0.2859963076023178, "grad_norm": 6.289409435598981, "learning_rate": 8.37786654396282e-06, "loss": 17.5666, "step": 15646 }, { "epoch": 0.28601458679876435, "grad_norm": 7.090039170347586, "learning_rate": 8.377648290861377e-06, "loss": 17.9468, "step": 15647 }, { "epoch": 0.2860328659952108, "grad_norm": 6.036925531325897, "learning_rate": 8.37743002592157e-06, "loss": 17.1986, "step": 15648 }, { "epoch": 0.28605114519165736, "grad_norm": 6.868252158953648, "learning_rate": 8.377211749144165e-06, "loss": 17.752, "step": 15649 }, { "epoch": 0.2860694243881039, "grad_norm": 9.033682671133217, "learning_rate": 8.376993460529925e-06, "loss": 18.5495, "step": 15650 }, { "epoch": 0.28608770358455043, "grad_norm": 5.882631257234656, "learning_rate": 8.376775160079614e-06, "loss": 17.2984, "step": 15651 }, { "epoch": 0.28610598278099697, "grad_norm": 7.8151648561632845, "learning_rate": 8.376556847794001e-06, "loss": 18.2152, "step": 15652 }, { "epoch": 0.28612426197744345, "grad_norm": 5.1476495119161, "learning_rate": 8.376338523673848e-06, "loss": 16.9924, "step": 15653 }, { "epoch": 0.28614254117389, "grad_norm": 5.977907295560187, "learning_rate": 8.376120187719924e-06, "loss": 17.4418, "step": 15654 }, { "epoch": 0.2861608203703365, "grad_norm": 8.701996806963585, "learning_rate": 8.37590183993299e-06, "loss": 18.1837, "step": 15655 }, { "epoch": 0.28617909956678306, "grad_norm": 6.594309340151606, "learning_rate": 8.375683480313812e-06, "loss": 17.8694, "step": 15656 }, { "epoch": 0.2861973787632296, "grad_norm": 7.245940354928539, "learning_rate": 8.375465108863159e-06, "loss": 17.9216, "step": 15657 }, { "epoch": 0.2862156579596761, "grad_norm": 7.323500051559651, "learning_rate": 8.375246725581792e-06, "loss": 18.2038, "step": 15658 }, { "epoch": 0.2862339371561226, "grad_norm": 7.425768035062593, "learning_rate": 8.375028330470477e-06, "loss": 17.7077, "step": 15659 }, { "epoch": 0.28625221635256914, "grad_norm": 6.646181546882208, "learning_rate": 8.374809923529981e-06, "loss": 17.5014, "step": 15660 }, { "epoch": 0.2862704955490157, "grad_norm": 7.815951148838298, "learning_rate": 8.374591504761072e-06, "loss": 18.3835, "step": 15661 }, { "epoch": 0.2862887747454622, "grad_norm": 9.259074008802063, "learning_rate": 8.37437307416451e-06, "loss": 17.9353, "step": 15662 }, { "epoch": 0.2863070539419087, "grad_norm": 5.373786956405164, "learning_rate": 8.374154631741063e-06, "loss": 17.1724, "step": 15663 }, { "epoch": 0.28632533313835523, "grad_norm": 6.369099122450056, "learning_rate": 8.373936177491497e-06, "loss": 17.517, "step": 15664 }, { "epoch": 0.28634361233480177, "grad_norm": 6.033493817967538, "learning_rate": 8.373717711416578e-06, "loss": 17.278, "step": 15665 }, { "epoch": 0.2863618915312483, "grad_norm": 6.127814021510294, "learning_rate": 8.373499233517071e-06, "loss": 17.382, "step": 15666 }, { "epoch": 0.28638017072769484, "grad_norm": 5.894522159937451, "learning_rate": 8.373280743793741e-06, "loss": 17.2759, "step": 15667 }, { "epoch": 0.2863984499241413, "grad_norm": 12.338123559113056, "learning_rate": 8.373062242247358e-06, "loss": 17.7412, "step": 15668 }, { "epoch": 0.28641672912058785, "grad_norm": 7.369914316078894, "learning_rate": 8.372843728878681e-06, "loss": 17.9116, "step": 15669 }, { "epoch": 0.2864350083170344, "grad_norm": 6.947360513007213, "learning_rate": 8.37262520368848e-06, "loss": 17.5352, "step": 15670 }, { "epoch": 0.2864532875134809, "grad_norm": 6.656061287890204, "learning_rate": 8.372406666677521e-06, "loss": 17.6119, "step": 15671 }, { "epoch": 0.2864715667099274, "grad_norm": 5.5692997538235725, "learning_rate": 8.37218811784657e-06, "loss": 16.9218, "step": 15672 }, { "epoch": 0.28648984590637394, "grad_norm": 6.848217536842453, "learning_rate": 8.371969557196391e-06, "loss": 17.6411, "step": 15673 }, { "epoch": 0.2865081251028205, "grad_norm": 6.129048388804936, "learning_rate": 8.371750984727753e-06, "loss": 17.4972, "step": 15674 }, { "epoch": 0.286526404299267, "grad_norm": 7.013742207397539, "learning_rate": 8.37153240044142e-06, "loss": 17.6785, "step": 15675 }, { "epoch": 0.28654468349571355, "grad_norm": 7.179054398456335, "learning_rate": 8.371313804338156e-06, "loss": 17.6121, "step": 15676 }, { "epoch": 0.28656296269216003, "grad_norm": 7.800869695566196, "learning_rate": 8.371095196418731e-06, "loss": 18.1214, "step": 15677 }, { "epoch": 0.28658124188860656, "grad_norm": 6.261844071848223, "learning_rate": 8.370876576683913e-06, "loss": 17.2797, "step": 15678 }, { "epoch": 0.2865995210850531, "grad_norm": 6.810676162116741, "learning_rate": 8.37065794513446e-06, "loss": 17.4442, "step": 15679 }, { "epoch": 0.28661780028149964, "grad_norm": 7.497837283605271, "learning_rate": 8.370439301771146e-06, "loss": 17.9312, "step": 15680 }, { "epoch": 0.28663607947794617, "grad_norm": 6.724830039731928, "learning_rate": 8.370220646594736e-06, "loss": 17.5545, "step": 15681 }, { "epoch": 0.28665435867439265, "grad_norm": 6.520261805262163, "learning_rate": 8.370001979605993e-06, "loss": 17.6657, "step": 15682 }, { "epoch": 0.2866726378708392, "grad_norm": 7.030807258369695, "learning_rate": 8.369783300805685e-06, "loss": 17.7105, "step": 15683 }, { "epoch": 0.2866909170672857, "grad_norm": 9.927787256458823, "learning_rate": 8.36956461019458e-06, "loss": 17.9591, "step": 15684 }, { "epoch": 0.28670919626373226, "grad_norm": 6.033712258318712, "learning_rate": 8.369345907773444e-06, "loss": 17.3323, "step": 15685 }, { "epoch": 0.2867274754601788, "grad_norm": 6.195990386468086, "learning_rate": 8.369127193543044e-06, "loss": 17.4414, "step": 15686 }, { "epoch": 0.2867457546566253, "grad_norm": 6.896687238686422, "learning_rate": 8.368908467504142e-06, "loss": 17.5145, "step": 15687 }, { "epoch": 0.2867640338530718, "grad_norm": 6.95707880779647, "learning_rate": 8.368689729657511e-06, "loss": 17.7917, "step": 15688 }, { "epoch": 0.28678231304951834, "grad_norm": 7.336968328842924, "learning_rate": 8.368470980003914e-06, "loss": 17.6072, "step": 15689 }, { "epoch": 0.2868005922459649, "grad_norm": 6.1451909369189215, "learning_rate": 8.368252218544117e-06, "loss": 17.4768, "step": 15690 }, { "epoch": 0.2868188714424114, "grad_norm": 6.061638916616824, "learning_rate": 8.368033445278892e-06, "loss": 17.5442, "step": 15691 }, { "epoch": 0.2868371506388579, "grad_norm": 6.548939791459066, "learning_rate": 8.367814660208999e-06, "loss": 17.635, "step": 15692 }, { "epoch": 0.28685542983530443, "grad_norm": 7.527961122980897, "learning_rate": 8.367595863335208e-06, "loss": 17.7922, "step": 15693 }, { "epoch": 0.28687370903175097, "grad_norm": 6.5655215870551045, "learning_rate": 8.367377054658287e-06, "loss": 17.3734, "step": 15694 }, { "epoch": 0.2868919882281975, "grad_norm": 6.477341736143212, "learning_rate": 8.367158234179001e-06, "loss": 17.4314, "step": 15695 }, { "epoch": 0.28691026742464404, "grad_norm": 7.475140751791854, "learning_rate": 8.366939401898117e-06, "loss": 17.6992, "step": 15696 }, { "epoch": 0.2869285466210905, "grad_norm": 7.305250930061159, "learning_rate": 8.366720557816404e-06, "loss": 17.6783, "step": 15697 }, { "epoch": 0.28694682581753705, "grad_norm": 7.440542250326834, "learning_rate": 8.366501701934626e-06, "loss": 17.774, "step": 15698 }, { "epoch": 0.2869651050139836, "grad_norm": 7.427751760169241, "learning_rate": 8.366282834253553e-06, "loss": 17.9105, "step": 15699 }, { "epoch": 0.2869833842104301, "grad_norm": 7.019817900435624, "learning_rate": 8.366063954773949e-06, "loss": 17.6111, "step": 15700 }, { "epoch": 0.28700166340687666, "grad_norm": 5.732647211925649, "learning_rate": 8.365845063496585e-06, "loss": 17.1256, "step": 15701 }, { "epoch": 0.28701994260332314, "grad_norm": 7.037915434264202, "learning_rate": 8.365626160422226e-06, "loss": 17.7761, "step": 15702 }, { "epoch": 0.2870382217997697, "grad_norm": 5.67736410354966, "learning_rate": 8.365407245551638e-06, "loss": 17.062, "step": 15703 }, { "epoch": 0.2870565009962162, "grad_norm": 5.468890560092592, "learning_rate": 8.36518831888559e-06, "loss": 17.0763, "step": 15704 }, { "epoch": 0.28707478019266275, "grad_norm": 5.434551600521866, "learning_rate": 8.364969380424849e-06, "loss": 16.9165, "step": 15705 }, { "epoch": 0.28709305938910923, "grad_norm": 6.323974577584368, "learning_rate": 8.364750430170183e-06, "loss": 17.3111, "step": 15706 }, { "epoch": 0.28711133858555576, "grad_norm": 7.9254576665947365, "learning_rate": 8.36453146812236e-06, "loss": 18.2983, "step": 15707 }, { "epoch": 0.2871296177820023, "grad_norm": 7.312198334280671, "learning_rate": 8.364312494282143e-06, "loss": 17.672, "step": 15708 }, { "epoch": 0.28714789697844884, "grad_norm": 6.6101487821054565, "learning_rate": 8.364093508650304e-06, "loss": 17.6043, "step": 15709 }, { "epoch": 0.28716617617489537, "grad_norm": 6.559681846630912, "learning_rate": 8.36387451122761e-06, "loss": 17.525, "step": 15710 }, { "epoch": 0.28718445537134185, "grad_norm": 7.656501045636494, "learning_rate": 8.363655502014826e-06, "loss": 18.0472, "step": 15711 }, { "epoch": 0.2872027345677884, "grad_norm": 9.269767916265716, "learning_rate": 8.363436481012722e-06, "loss": 17.7054, "step": 15712 }, { "epoch": 0.2872210137642349, "grad_norm": 6.74409848397112, "learning_rate": 8.363217448222065e-06, "loss": 17.3151, "step": 15713 }, { "epoch": 0.28723929296068146, "grad_norm": 8.016435392112882, "learning_rate": 8.362998403643623e-06, "loss": 18.1044, "step": 15714 }, { "epoch": 0.287257572157128, "grad_norm": 6.570574622683178, "learning_rate": 8.362779347278163e-06, "loss": 17.3039, "step": 15715 }, { "epoch": 0.2872758513535745, "grad_norm": 7.366678450115921, "learning_rate": 8.362560279126454e-06, "loss": 17.9155, "step": 15716 }, { "epoch": 0.287294130550021, "grad_norm": 7.345328132863208, "learning_rate": 8.362341199189264e-06, "loss": 18.1081, "step": 15717 }, { "epoch": 0.28731240974646755, "grad_norm": 6.579812525042959, "learning_rate": 8.362122107467357e-06, "loss": 17.4539, "step": 15718 }, { "epoch": 0.2873306889429141, "grad_norm": 6.677578022932323, "learning_rate": 8.361903003961507e-06, "loss": 17.6364, "step": 15719 }, { "epoch": 0.2873489681393606, "grad_norm": 7.310175659897703, "learning_rate": 8.361683888672475e-06, "loss": 17.9238, "step": 15720 }, { "epoch": 0.2873672473358071, "grad_norm": 7.462718364959586, "learning_rate": 8.361464761601036e-06, "loss": 17.6278, "step": 15721 }, { "epoch": 0.28738552653225363, "grad_norm": 7.227925124793609, "learning_rate": 8.361245622747954e-06, "loss": 17.6803, "step": 15722 }, { "epoch": 0.28740380572870017, "grad_norm": 5.185760579323815, "learning_rate": 8.361026472113997e-06, "loss": 16.9618, "step": 15723 }, { "epoch": 0.2874220849251467, "grad_norm": 6.827928535280947, "learning_rate": 8.360807309699934e-06, "loss": 17.7473, "step": 15724 }, { "epoch": 0.28744036412159324, "grad_norm": 7.476993260591754, "learning_rate": 8.360588135506532e-06, "loss": 17.9636, "step": 15725 }, { "epoch": 0.2874586433180397, "grad_norm": 6.337144269226875, "learning_rate": 8.360368949534562e-06, "loss": 17.6345, "step": 15726 }, { "epoch": 0.28747692251448626, "grad_norm": 6.127933451755372, "learning_rate": 8.360149751784789e-06, "loss": 17.37, "step": 15727 }, { "epoch": 0.2874952017109328, "grad_norm": 6.0698608722058935, "learning_rate": 8.359930542257984e-06, "loss": 17.1567, "step": 15728 }, { "epoch": 0.2875134809073793, "grad_norm": 5.84965151610325, "learning_rate": 8.359711320954913e-06, "loss": 17.3871, "step": 15729 }, { "epoch": 0.28753176010382586, "grad_norm": 7.0989870416467875, "learning_rate": 8.359492087876346e-06, "loss": 17.6362, "step": 15730 }, { "epoch": 0.28755003930027234, "grad_norm": 7.949534533597411, "learning_rate": 8.359272843023049e-06, "loss": 18.5204, "step": 15731 }, { "epoch": 0.2875683184967189, "grad_norm": 8.35695095818438, "learning_rate": 8.359053586395796e-06, "loss": 18.2508, "step": 15732 }, { "epoch": 0.2875865976931654, "grad_norm": 5.440921624763122, "learning_rate": 8.358834317995349e-06, "loss": 17.1725, "step": 15733 }, { "epoch": 0.28760487688961195, "grad_norm": 7.033113579652731, "learning_rate": 8.35861503782248e-06, "loss": 17.7829, "step": 15734 }, { "epoch": 0.2876231560860585, "grad_norm": 6.064502051210512, "learning_rate": 8.358395745877956e-06, "loss": 17.2337, "step": 15735 }, { "epoch": 0.28764143528250496, "grad_norm": 6.502516987498833, "learning_rate": 8.358176442162545e-06, "loss": 17.3482, "step": 15736 }, { "epoch": 0.2876597144789515, "grad_norm": 7.3532004121531, "learning_rate": 8.35795712667702e-06, "loss": 18.0178, "step": 15737 }, { "epoch": 0.28767799367539804, "grad_norm": 7.212506742678808, "learning_rate": 8.357737799422144e-06, "loss": 17.9006, "step": 15738 }, { "epoch": 0.28769627287184457, "grad_norm": 5.851440011004375, "learning_rate": 8.35751846039869e-06, "loss": 17.3012, "step": 15739 }, { "epoch": 0.28771455206829105, "grad_norm": 7.668508686807459, "learning_rate": 8.357299109607425e-06, "loss": 18.4711, "step": 15740 }, { "epoch": 0.2877328312647376, "grad_norm": 8.088217389170984, "learning_rate": 8.357079747049116e-06, "loss": 18.3095, "step": 15741 }, { "epoch": 0.2877511104611841, "grad_norm": 6.679334674361961, "learning_rate": 8.356860372724538e-06, "loss": 17.736, "step": 15742 }, { "epoch": 0.28776938965763066, "grad_norm": 6.996640799362128, "learning_rate": 8.356640986634453e-06, "loss": 17.7252, "step": 15743 }, { "epoch": 0.2877876688540772, "grad_norm": 6.84840280288548, "learning_rate": 8.356421588779633e-06, "loss": 17.6636, "step": 15744 }, { "epoch": 0.2878059480505237, "grad_norm": 6.754510511269283, "learning_rate": 8.356202179160847e-06, "loss": 17.4008, "step": 15745 }, { "epoch": 0.2878242272469702, "grad_norm": 5.698967045748068, "learning_rate": 8.355982757778861e-06, "loss": 17.262, "step": 15746 }, { "epoch": 0.28784250644341675, "grad_norm": 6.573891907933512, "learning_rate": 8.35576332463445e-06, "loss": 17.4271, "step": 15747 }, { "epoch": 0.2878607856398633, "grad_norm": 6.243566704387631, "learning_rate": 8.355543879728378e-06, "loss": 17.4468, "step": 15748 }, { "epoch": 0.2878790648363098, "grad_norm": 6.31556660161982, "learning_rate": 8.355324423061415e-06, "loss": 17.5753, "step": 15749 }, { "epoch": 0.2878973440327563, "grad_norm": 5.770455445087101, "learning_rate": 8.355104954634334e-06, "loss": 17.0861, "step": 15750 }, { "epoch": 0.28791562322920283, "grad_norm": 7.50727004029445, "learning_rate": 8.3548854744479e-06, "loss": 18.0645, "step": 15751 }, { "epoch": 0.28793390242564937, "grad_norm": 8.677714095973903, "learning_rate": 8.354665982502883e-06, "loss": 18.379, "step": 15752 }, { "epoch": 0.2879521816220959, "grad_norm": 4.7307551149284945, "learning_rate": 8.354446478800053e-06, "loss": 16.738, "step": 15753 }, { "epoch": 0.28797046081854244, "grad_norm": 6.695736112471265, "learning_rate": 8.35422696334018e-06, "loss": 17.8252, "step": 15754 }, { "epoch": 0.2879887400149889, "grad_norm": 6.32623048304772, "learning_rate": 8.354007436124031e-06, "loss": 17.4677, "step": 15755 }, { "epoch": 0.28800701921143546, "grad_norm": 5.641460203140147, "learning_rate": 8.353787897152377e-06, "loss": 17.2396, "step": 15756 }, { "epoch": 0.288025298407882, "grad_norm": 5.224213157568367, "learning_rate": 8.353568346425989e-06, "loss": 17.1579, "step": 15757 }, { "epoch": 0.2880435776043285, "grad_norm": 6.951452327045251, "learning_rate": 8.353348783945633e-06, "loss": 17.7508, "step": 15758 }, { "epoch": 0.28806185680077506, "grad_norm": 6.943798294859284, "learning_rate": 8.353129209712084e-06, "loss": 17.5634, "step": 15759 }, { "epoch": 0.28808013599722154, "grad_norm": 8.345537359122252, "learning_rate": 8.352909623726105e-06, "loss": 18.1797, "step": 15760 }, { "epoch": 0.2880984151936681, "grad_norm": 5.59276660194879, "learning_rate": 8.352690025988468e-06, "loss": 17.2527, "step": 15761 }, { "epoch": 0.2881166943901146, "grad_norm": 6.721423592195531, "learning_rate": 8.352470416499945e-06, "loss": 17.5115, "step": 15762 }, { "epoch": 0.28813497358656115, "grad_norm": 6.34198069378264, "learning_rate": 8.352250795261304e-06, "loss": 17.4371, "step": 15763 }, { "epoch": 0.2881532527830077, "grad_norm": 6.625762213620717, "learning_rate": 8.352031162273316e-06, "loss": 17.8069, "step": 15764 }, { "epoch": 0.28817153197945417, "grad_norm": 7.369267609233369, "learning_rate": 8.351811517536748e-06, "loss": 17.8182, "step": 15765 }, { "epoch": 0.2881898111759007, "grad_norm": 6.127724232807258, "learning_rate": 8.351591861052371e-06, "loss": 17.4318, "step": 15766 }, { "epoch": 0.28820809037234724, "grad_norm": 5.957781267863218, "learning_rate": 8.351372192820956e-06, "loss": 17.2612, "step": 15767 }, { "epoch": 0.28822636956879377, "grad_norm": 7.40018453570937, "learning_rate": 8.351152512843273e-06, "loss": 18.0546, "step": 15768 }, { "epoch": 0.2882446487652403, "grad_norm": 7.588501590064388, "learning_rate": 8.350932821120093e-06, "loss": 17.5848, "step": 15769 }, { "epoch": 0.2882629279616868, "grad_norm": 6.9466494681157505, "learning_rate": 8.35071311765218e-06, "loss": 17.8109, "step": 15770 }, { "epoch": 0.2882812071581333, "grad_norm": 8.034972029273105, "learning_rate": 8.350493402440312e-06, "loss": 18.2194, "step": 15771 }, { "epoch": 0.28829948635457986, "grad_norm": 6.376879871939102, "learning_rate": 8.350273675485251e-06, "loss": 17.489, "step": 15772 }, { "epoch": 0.2883177655510264, "grad_norm": 6.655720185303255, "learning_rate": 8.350053936787777e-06, "loss": 17.5294, "step": 15773 }, { "epoch": 0.2883360447474729, "grad_norm": 6.714952995024809, "learning_rate": 8.349834186348652e-06, "loss": 17.5506, "step": 15774 }, { "epoch": 0.2883543239439194, "grad_norm": 6.856311084096559, "learning_rate": 8.349614424168649e-06, "loss": 17.8293, "step": 15775 }, { "epoch": 0.28837260314036595, "grad_norm": 5.681622234775624, "learning_rate": 8.349394650248537e-06, "loss": 17.2425, "step": 15776 }, { "epoch": 0.2883908823368125, "grad_norm": 5.712181956332461, "learning_rate": 8.349174864589088e-06, "loss": 17.1507, "step": 15777 }, { "epoch": 0.288409161533259, "grad_norm": 5.832129861901588, "learning_rate": 8.348955067191071e-06, "loss": 17.1996, "step": 15778 }, { "epoch": 0.2884274407297055, "grad_norm": 7.35506781927844, "learning_rate": 8.348735258055258e-06, "loss": 17.5264, "step": 15779 }, { "epoch": 0.28844571992615203, "grad_norm": 8.321872434461017, "learning_rate": 8.34851543718242e-06, "loss": 17.928, "step": 15780 }, { "epoch": 0.28846399912259857, "grad_norm": 7.331634591493145, "learning_rate": 8.348295604573324e-06, "loss": 17.6809, "step": 15781 }, { "epoch": 0.2884822783190451, "grad_norm": 6.64089174685003, "learning_rate": 8.348075760228744e-06, "loss": 17.5471, "step": 15782 }, { "epoch": 0.28850055751549164, "grad_norm": 6.379516298623369, "learning_rate": 8.347855904149447e-06, "loss": 17.4211, "step": 15783 }, { "epoch": 0.2885188367119381, "grad_norm": 5.813734280984642, "learning_rate": 8.347636036336207e-06, "loss": 17.1827, "step": 15784 }, { "epoch": 0.28853711590838466, "grad_norm": 7.679506660534976, "learning_rate": 8.347416156789791e-06, "loss": 18.2738, "step": 15785 }, { "epoch": 0.2885553951048312, "grad_norm": 6.070713105725202, "learning_rate": 8.347196265510976e-06, "loss": 17.6549, "step": 15786 }, { "epoch": 0.2885736743012777, "grad_norm": 8.341650233524906, "learning_rate": 8.346976362500526e-06, "loss": 17.9239, "step": 15787 }, { "epoch": 0.28859195349772426, "grad_norm": 6.223210640444305, "learning_rate": 8.346756447759215e-06, "loss": 17.5592, "step": 15788 }, { "epoch": 0.28861023269417074, "grad_norm": 7.440064451023787, "learning_rate": 8.346536521287812e-06, "loss": 18.4392, "step": 15789 }, { "epoch": 0.2886285118906173, "grad_norm": 6.827000463227836, "learning_rate": 8.346316583087088e-06, "loss": 17.6463, "step": 15790 }, { "epoch": 0.2886467910870638, "grad_norm": 7.2689956750364555, "learning_rate": 8.346096633157816e-06, "loss": 17.6978, "step": 15791 }, { "epoch": 0.28866507028351035, "grad_norm": 9.560304213282526, "learning_rate": 8.345876671500766e-06, "loss": 18.0112, "step": 15792 }, { "epoch": 0.2886833494799569, "grad_norm": 6.371597099476278, "learning_rate": 8.345656698116708e-06, "loss": 17.3692, "step": 15793 }, { "epoch": 0.28870162867640337, "grad_norm": 5.686262289011201, "learning_rate": 8.345436713006416e-06, "loss": 16.9762, "step": 15794 }, { "epoch": 0.2887199078728499, "grad_norm": 7.621295718506177, "learning_rate": 8.345216716170656e-06, "loss": 17.5674, "step": 15795 }, { "epoch": 0.28873818706929644, "grad_norm": 7.620971429766454, "learning_rate": 8.344996707610202e-06, "loss": 17.7524, "step": 15796 }, { "epoch": 0.288756466265743, "grad_norm": 6.191796140829016, "learning_rate": 8.344776687325825e-06, "loss": 17.313, "step": 15797 }, { "epoch": 0.2887747454621895, "grad_norm": 6.181568030098211, "learning_rate": 8.344556655318296e-06, "loss": 17.5748, "step": 15798 }, { "epoch": 0.288793024658636, "grad_norm": 6.268887034304854, "learning_rate": 8.344336611588385e-06, "loss": 17.3379, "step": 15799 }, { "epoch": 0.2888113038550825, "grad_norm": 6.341147994667579, "learning_rate": 8.344116556136867e-06, "loss": 17.4737, "step": 15800 }, { "epoch": 0.28882958305152906, "grad_norm": 6.033665314323493, "learning_rate": 8.34389648896451e-06, "loss": 17.6287, "step": 15801 }, { "epoch": 0.2888478622479756, "grad_norm": 6.986899797204273, "learning_rate": 8.343676410072086e-06, "loss": 17.2298, "step": 15802 }, { "epoch": 0.28886614144442213, "grad_norm": 8.022259512284283, "learning_rate": 8.343456319460365e-06, "loss": 18.4381, "step": 15803 }, { "epoch": 0.2888844206408686, "grad_norm": 7.335846744323855, "learning_rate": 8.34323621713012e-06, "loss": 18.1217, "step": 15804 }, { "epoch": 0.28890269983731515, "grad_norm": 5.4181572914117195, "learning_rate": 8.343016103082122e-06, "loss": 17.2419, "step": 15805 }, { "epoch": 0.2889209790337617, "grad_norm": 7.668717511255242, "learning_rate": 8.342795977317144e-06, "loss": 18.0679, "step": 15806 }, { "epoch": 0.2889392582302082, "grad_norm": 6.007362244453671, "learning_rate": 8.342575839835954e-06, "loss": 17.324, "step": 15807 }, { "epoch": 0.2889575374266547, "grad_norm": 5.766159683698141, "learning_rate": 8.342355690639329e-06, "loss": 17.1475, "step": 15808 }, { "epoch": 0.28897581662310123, "grad_norm": 7.18569407857634, "learning_rate": 8.342135529728036e-06, "loss": 18.1369, "step": 15809 }, { "epoch": 0.28899409581954777, "grad_norm": 8.230697519035877, "learning_rate": 8.341915357102846e-06, "loss": 18.0357, "step": 15810 }, { "epoch": 0.2890123750159943, "grad_norm": 6.858098229288025, "learning_rate": 8.341695172764533e-06, "loss": 17.6922, "step": 15811 }, { "epoch": 0.28903065421244084, "grad_norm": 6.996834134548224, "learning_rate": 8.34147497671387e-06, "loss": 17.7166, "step": 15812 }, { "epoch": 0.2890489334088873, "grad_norm": 7.044271065411338, "learning_rate": 8.341254768951627e-06, "loss": 17.9316, "step": 15813 }, { "epoch": 0.28906721260533386, "grad_norm": 6.186792034925866, "learning_rate": 8.341034549478575e-06, "loss": 17.378, "step": 15814 }, { "epoch": 0.2890854918017804, "grad_norm": 6.568810237734127, "learning_rate": 8.340814318295488e-06, "loss": 17.3772, "step": 15815 }, { "epoch": 0.2891037709982269, "grad_norm": 7.044154342767684, "learning_rate": 8.340594075403137e-06, "loss": 17.8485, "step": 15816 }, { "epoch": 0.28912205019467346, "grad_norm": 5.307773036124306, "learning_rate": 8.340373820802292e-06, "loss": 17.0261, "step": 15817 }, { "epoch": 0.28914032939111994, "grad_norm": 6.701566476623037, "learning_rate": 8.340153554493727e-06, "loss": 17.7325, "step": 15818 }, { "epoch": 0.2891586085875665, "grad_norm": 7.622078532809152, "learning_rate": 8.339933276478215e-06, "loss": 17.7651, "step": 15819 }, { "epoch": 0.289176887784013, "grad_norm": 7.24709114490203, "learning_rate": 8.339712986756524e-06, "loss": 18.1042, "step": 15820 }, { "epoch": 0.28919516698045955, "grad_norm": 6.313386930704077, "learning_rate": 8.339492685329431e-06, "loss": 17.5091, "step": 15821 }, { "epoch": 0.2892134461769061, "grad_norm": 7.392391100618513, "learning_rate": 8.339272372197707e-06, "loss": 17.9825, "step": 15822 }, { "epoch": 0.28923172537335257, "grad_norm": 6.142106388509246, "learning_rate": 8.339052047362122e-06, "loss": 17.5634, "step": 15823 }, { "epoch": 0.2892500045697991, "grad_norm": 5.677965210339009, "learning_rate": 8.338831710823448e-06, "loss": 17.2084, "step": 15824 }, { "epoch": 0.28926828376624564, "grad_norm": 6.277885906577304, "learning_rate": 8.338611362582458e-06, "loss": 17.598, "step": 15825 }, { "epoch": 0.2892865629626922, "grad_norm": 6.359339975831886, "learning_rate": 8.338391002639927e-06, "loss": 17.3931, "step": 15826 }, { "epoch": 0.2893048421591387, "grad_norm": 6.134284777287734, "learning_rate": 8.338170630996625e-06, "loss": 17.5104, "step": 15827 }, { "epoch": 0.2893231213555852, "grad_norm": 7.572483903418803, "learning_rate": 8.337950247653323e-06, "loss": 17.9657, "step": 15828 }, { "epoch": 0.2893414005520317, "grad_norm": 6.421304801092773, "learning_rate": 8.337729852610797e-06, "loss": 17.6365, "step": 15829 }, { "epoch": 0.28935967974847826, "grad_norm": 8.723364308966541, "learning_rate": 8.337509445869818e-06, "loss": 18.4818, "step": 15830 }, { "epoch": 0.2893779589449248, "grad_norm": 6.2155569847895515, "learning_rate": 8.337289027431156e-06, "loss": 17.3669, "step": 15831 }, { "epoch": 0.28939623814137133, "grad_norm": 6.416649729972007, "learning_rate": 8.337068597295585e-06, "loss": 17.4181, "step": 15832 }, { "epoch": 0.2894145173378178, "grad_norm": 8.053614383827934, "learning_rate": 8.33684815546388e-06, "loss": 18.5884, "step": 15833 }, { "epoch": 0.28943279653426435, "grad_norm": 7.767476813720748, "learning_rate": 8.336627701936813e-06, "loss": 17.7285, "step": 15834 }, { "epoch": 0.2894510757307109, "grad_norm": 7.30343678621806, "learning_rate": 8.336407236715152e-06, "loss": 17.9958, "step": 15835 }, { "epoch": 0.2894693549271574, "grad_norm": 6.469256363024108, "learning_rate": 8.336186759799675e-06, "loss": 17.4907, "step": 15836 }, { "epoch": 0.28948763412360395, "grad_norm": 7.4002692280470175, "learning_rate": 8.335966271191154e-06, "loss": 17.884, "step": 15837 }, { "epoch": 0.28950591332005043, "grad_norm": 7.020862962918426, "learning_rate": 8.335745770890359e-06, "loss": 17.6649, "step": 15838 }, { "epoch": 0.28952419251649697, "grad_norm": 5.913564074879372, "learning_rate": 8.335525258898065e-06, "loss": 17.2542, "step": 15839 }, { "epoch": 0.2895424717129435, "grad_norm": 6.964672161097862, "learning_rate": 8.335304735215044e-06, "loss": 17.5509, "step": 15840 }, { "epoch": 0.28956075090939004, "grad_norm": 7.032762438823331, "learning_rate": 8.33508419984207e-06, "loss": 18.202, "step": 15841 }, { "epoch": 0.2895790301058365, "grad_norm": 7.0709440844314, "learning_rate": 8.334863652779914e-06, "loss": 17.6258, "step": 15842 }, { "epoch": 0.28959730930228306, "grad_norm": 7.833117486528488, "learning_rate": 8.334643094029354e-06, "loss": 17.5652, "step": 15843 }, { "epoch": 0.2896155884987296, "grad_norm": 7.707687027103323, "learning_rate": 8.334422523591154e-06, "loss": 17.7136, "step": 15844 }, { "epoch": 0.28963386769517613, "grad_norm": 8.420145673153188, "learning_rate": 8.334201941466096e-06, "loss": 18.1864, "step": 15845 }, { "epoch": 0.28965214689162266, "grad_norm": 5.708632640111591, "learning_rate": 8.333981347654947e-06, "loss": 17.0223, "step": 15846 }, { "epoch": 0.28967042608806914, "grad_norm": 5.904381181598544, "learning_rate": 8.333760742158485e-06, "loss": 17.3161, "step": 15847 }, { "epoch": 0.2896887052845157, "grad_norm": 6.625647036338524, "learning_rate": 8.333540124977482e-06, "loss": 17.3811, "step": 15848 }, { "epoch": 0.2897069844809622, "grad_norm": 8.40824991275657, "learning_rate": 8.333319496112707e-06, "loss": 18.5501, "step": 15849 }, { "epoch": 0.28972526367740875, "grad_norm": 5.080051963454226, "learning_rate": 8.333098855564938e-06, "loss": 16.8455, "step": 15850 }, { "epoch": 0.2897435428738553, "grad_norm": 6.884719320562312, "learning_rate": 8.332878203334946e-06, "loss": 17.5062, "step": 15851 }, { "epoch": 0.28976182207030177, "grad_norm": 8.473160270872027, "learning_rate": 8.332657539423505e-06, "loss": 17.6787, "step": 15852 }, { "epoch": 0.2897801012667483, "grad_norm": 6.920592389283835, "learning_rate": 8.33243686383139e-06, "loss": 17.5786, "step": 15853 }, { "epoch": 0.28979838046319484, "grad_norm": 7.619456747101325, "learning_rate": 8.332216176559371e-06, "loss": 18.207, "step": 15854 }, { "epoch": 0.2898166596596414, "grad_norm": 7.784304950394402, "learning_rate": 8.331995477608225e-06, "loss": 18.1824, "step": 15855 }, { "epoch": 0.2898349388560879, "grad_norm": 8.779377212254373, "learning_rate": 8.331774766978723e-06, "loss": 18.0307, "step": 15856 }, { "epoch": 0.2898532180525344, "grad_norm": 7.566250554103422, "learning_rate": 8.331554044671641e-06, "loss": 17.8003, "step": 15857 }, { "epoch": 0.2898714972489809, "grad_norm": 6.331108699259542, "learning_rate": 8.331333310687751e-06, "loss": 17.5381, "step": 15858 }, { "epoch": 0.28988977644542746, "grad_norm": 5.620430241750999, "learning_rate": 8.331112565027825e-06, "loss": 17.1739, "step": 15859 }, { "epoch": 0.289908055641874, "grad_norm": 7.172176970203313, "learning_rate": 8.33089180769264e-06, "loss": 17.8289, "step": 15860 }, { "epoch": 0.28992633483832053, "grad_norm": 6.1488761230885824, "learning_rate": 8.330671038682967e-06, "loss": 17.2673, "step": 15861 }, { "epoch": 0.289944614034767, "grad_norm": 6.707370241170042, "learning_rate": 8.330450257999582e-06, "loss": 17.4739, "step": 15862 }, { "epoch": 0.28996289323121355, "grad_norm": 7.522112889595386, "learning_rate": 8.330229465643257e-06, "loss": 17.9497, "step": 15863 }, { "epoch": 0.2899811724276601, "grad_norm": 7.0785085349866925, "learning_rate": 8.330008661614769e-06, "loss": 17.644, "step": 15864 }, { "epoch": 0.2899994516241066, "grad_norm": 5.762830020144023, "learning_rate": 8.329787845914888e-06, "loss": 17.4525, "step": 15865 }, { "epoch": 0.29001773082055315, "grad_norm": 6.201282452027288, "learning_rate": 8.32956701854439e-06, "loss": 17.4657, "step": 15866 }, { "epoch": 0.29003601001699963, "grad_norm": 6.9848531405467815, "learning_rate": 8.329346179504046e-06, "loss": 17.948, "step": 15867 }, { "epoch": 0.29005428921344617, "grad_norm": 6.343855021026234, "learning_rate": 8.329125328794635e-06, "loss": 17.1982, "step": 15868 }, { "epoch": 0.2900725684098927, "grad_norm": 7.5193987271791585, "learning_rate": 8.328904466416929e-06, "loss": 18.4442, "step": 15869 }, { "epoch": 0.29009084760633924, "grad_norm": 6.16276905751907, "learning_rate": 8.3286835923717e-06, "loss": 17.5355, "step": 15870 }, { "epoch": 0.2901091268027858, "grad_norm": 6.806165075539182, "learning_rate": 8.328462706659726e-06, "loss": 17.5512, "step": 15871 }, { "epoch": 0.29012740599923226, "grad_norm": 7.470899654380862, "learning_rate": 8.328241809281776e-06, "loss": 17.7824, "step": 15872 }, { "epoch": 0.2901456851956788, "grad_norm": 7.926610709436997, "learning_rate": 8.32802090023863e-06, "loss": 18.0969, "step": 15873 }, { "epoch": 0.29016396439212533, "grad_norm": 5.75081515714187, "learning_rate": 8.327799979531058e-06, "loss": 17.2268, "step": 15874 }, { "epoch": 0.29018224358857186, "grad_norm": 6.982746896800702, "learning_rate": 8.327579047159837e-06, "loss": 17.4291, "step": 15875 }, { "epoch": 0.29020052278501834, "grad_norm": 5.570328508642877, "learning_rate": 8.32735810312574e-06, "loss": 17.2382, "step": 15876 }, { "epoch": 0.2902188019814649, "grad_norm": 6.244755244437659, "learning_rate": 8.32713714742954e-06, "loss": 17.2208, "step": 15877 }, { "epoch": 0.2902370811779114, "grad_norm": 6.844615777860132, "learning_rate": 8.326916180072015e-06, "loss": 17.7951, "step": 15878 }, { "epoch": 0.29025536037435795, "grad_norm": 6.211093470232108, "learning_rate": 8.326695201053937e-06, "loss": 17.3398, "step": 15879 }, { "epoch": 0.2902736395708045, "grad_norm": 7.295018932536425, "learning_rate": 8.32647421037608e-06, "loss": 17.6137, "step": 15880 }, { "epoch": 0.29029191876725097, "grad_norm": 6.186315851716792, "learning_rate": 8.326253208039222e-06, "loss": 17.4425, "step": 15881 }, { "epoch": 0.2903101979636975, "grad_norm": 6.9896372933184345, "learning_rate": 8.326032194044132e-06, "loss": 17.6139, "step": 15882 }, { "epoch": 0.29032847716014404, "grad_norm": 5.796558153213868, "learning_rate": 8.325811168391589e-06, "loss": 17.2714, "step": 15883 }, { "epoch": 0.2903467563565906, "grad_norm": 5.973745328774716, "learning_rate": 8.325590131082367e-06, "loss": 17.4456, "step": 15884 }, { "epoch": 0.2903650355530371, "grad_norm": 7.042997243532951, "learning_rate": 8.32536908211724e-06, "loss": 17.5358, "step": 15885 }, { "epoch": 0.2903833147494836, "grad_norm": 6.664916227130399, "learning_rate": 8.325148021496982e-06, "loss": 17.671, "step": 15886 }, { "epoch": 0.2904015939459301, "grad_norm": 6.326458415158664, "learning_rate": 8.32492694922237e-06, "loss": 17.5055, "step": 15887 }, { "epoch": 0.29041987314237666, "grad_norm": 6.8060649565236435, "learning_rate": 8.324705865294178e-06, "loss": 17.7007, "step": 15888 }, { "epoch": 0.2904381523388232, "grad_norm": 6.149219705617901, "learning_rate": 8.324484769713179e-06, "loss": 17.4992, "step": 15889 }, { "epoch": 0.29045643153526973, "grad_norm": 6.0369480476305455, "learning_rate": 8.32426366248015e-06, "loss": 17.3001, "step": 15890 }, { "epoch": 0.2904747107317162, "grad_norm": 6.877425771103101, "learning_rate": 8.324042543595866e-06, "loss": 17.8269, "step": 15891 }, { "epoch": 0.29049298992816275, "grad_norm": 5.443113205173727, "learning_rate": 8.3238214130611e-06, "loss": 17.36, "step": 15892 }, { "epoch": 0.2905112691246093, "grad_norm": 6.024407802871284, "learning_rate": 8.323600270876628e-06, "loss": 17.4058, "step": 15893 }, { "epoch": 0.2905295483210558, "grad_norm": 6.59169659842555, "learning_rate": 8.323379117043226e-06, "loss": 17.6154, "step": 15894 }, { "epoch": 0.29054782751750236, "grad_norm": 8.11732513575511, "learning_rate": 8.32315795156167e-06, "loss": 18.2892, "step": 15895 }, { "epoch": 0.29056610671394884, "grad_norm": 6.8012147171376975, "learning_rate": 8.322936774432733e-06, "loss": 17.4193, "step": 15896 }, { "epoch": 0.29058438591039537, "grad_norm": 6.4210491553833124, "learning_rate": 8.322715585657191e-06, "loss": 17.922, "step": 15897 }, { "epoch": 0.2906026651068419, "grad_norm": 6.96120011220932, "learning_rate": 8.322494385235818e-06, "loss": 17.9317, "step": 15898 }, { "epoch": 0.29062094430328844, "grad_norm": 6.308022217355439, "learning_rate": 8.322273173169392e-06, "loss": 17.5599, "step": 15899 }, { "epoch": 0.290639223499735, "grad_norm": 6.59785364956779, "learning_rate": 8.322051949458686e-06, "loss": 17.4036, "step": 15900 }, { "epoch": 0.29065750269618146, "grad_norm": 6.912208280964955, "learning_rate": 8.321830714104476e-06, "loss": 17.7087, "step": 15901 }, { "epoch": 0.290675781892628, "grad_norm": 7.600567631357769, "learning_rate": 8.321609467107538e-06, "loss": 18.1136, "step": 15902 }, { "epoch": 0.29069406108907453, "grad_norm": 5.760471125677956, "learning_rate": 8.321388208468647e-06, "loss": 17.3006, "step": 15903 }, { "epoch": 0.29071234028552106, "grad_norm": 6.084253569765747, "learning_rate": 8.321166938188578e-06, "loss": 17.0782, "step": 15904 }, { "epoch": 0.2907306194819676, "grad_norm": 5.168974005331681, "learning_rate": 8.320945656268109e-06, "loss": 17.0572, "step": 15905 }, { "epoch": 0.2907488986784141, "grad_norm": 5.221581518003498, "learning_rate": 8.320724362708013e-06, "loss": 17.0476, "step": 15906 }, { "epoch": 0.2907671778748606, "grad_norm": 7.866374632689439, "learning_rate": 8.320503057509064e-06, "loss": 17.6504, "step": 15907 }, { "epoch": 0.29078545707130715, "grad_norm": 6.143672742519724, "learning_rate": 8.320281740672042e-06, "loss": 17.6234, "step": 15908 }, { "epoch": 0.2908037362677537, "grad_norm": 6.609740979769964, "learning_rate": 8.32006041219772e-06, "loss": 17.6301, "step": 15909 }, { "epoch": 0.29082201546420017, "grad_norm": 6.571069191355662, "learning_rate": 8.319839072086876e-06, "loss": 17.6319, "step": 15910 }, { "epoch": 0.2908402946606467, "grad_norm": 7.908554049421495, "learning_rate": 8.31961772034028e-06, "loss": 17.9841, "step": 15911 }, { "epoch": 0.29085857385709324, "grad_norm": 6.285597972319262, "learning_rate": 8.319396356958716e-06, "loss": 17.3774, "step": 15912 }, { "epoch": 0.2908768530535398, "grad_norm": 7.75251298088019, "learning_rate": 8.319174981942955e-06, "loss": 18.1743, "step": 15913 }, { "epoch": 0.2908951322499863, "grad_norm": 6.516258591895095, "learning_rate": 8.318953595293772e-06, "loss": 17.6466, "step": 15914 }, { "epoch": 0.2909134114464328, "grad_norm": 6.639266152220219, "learning_rate": 8.318732197011945e-06, "loss": 17.7367, "step": 15915 }, { "epoch": 0.2909316906428793, "grad_norm": 5.321623586949735, "learning_rate": 8.318510787098252e-06, "loss": 17.0874, "step": 15916 }, { "epoch": 0.29094996983932586, "grad_norm": 6.611776542798462, "learning_rate": 8.318289365553465e-06, "loss": 17.68, "step": 15917 }, { "epoch": 0.2909682490357724, "grad_norm": 7.1980416374132945, "learning_rate": 8.318067932378361e-06, "loss": 17.6493, "step": 15918 }, { "epoch": 0.29098652823221893, "grad_norm": 6.15059107590305, "learning_rate": 8.317846487573717e-06, "loss": 17.3689, "step": 15919 }, { "epoch": 0.2910048074286654, "grad_norm": 6.84114950642885, "learning_rate": 8.31762503114031e-06, "loss": 17.456, "step": 15920 }, { "epoch": 0.29102308662511195, "grad_norm": 5.920430701908309, "learning_rate": 8.317403563078915e-06, "loss": 17.1576, "step": 15921 }, { "epoch": 0.2910413658215585, "grad_norm": 6.375604895983556, "learning_rate": 8.317182083390307e-06, "loss": 17.4953, "step": 15922 }, { "epoch": 0.291059645018005, "grad_norm": 6.330529357697266, "learning_rate": 8.316960592075267e-06, "loss": 17.5169, "step": 15923 }, { "epoch": 0.29107792421445156, "grad_norm": 7.647725504781784, "learning_rate": 8.316739089134564e-06, "loss": 18.0041, "step": 15924 }, { "epoch": 0.29109620341089804, "grad_norm": 6.060455983108166, "learning_rate": 8.316517574568981e-06, "loss": 17.3912, "step": 15925 }, { "epoch": 0.29111448260734457, "grad_norm": 7.1133631092179135, "learning_rate": 8.31629604837929e-06, "loss": 17.7133, "step": 15926 }, { "epoch": 0.2911327618037911, "grad_norm": 4.92741934288932, "learning_rate": 8.31607451056627e-06, "loss": 16.7877, "step": 15927 }, { "epoch": 0.29115104100023764, "grad_norm": 5.807936016586364, "learning_rate": 8.315852961130697e-06, "loss": 17.3348, "step": 15928 }, { "epoch": 0.2911693201966842, "grad_norm": 6.720183569198894, "learning_rate": 8.315631400073346e-06, "loss": 17.5512, "step": 15929 }, { "epoch": 0.29118759939313066, "grad_norm": 6.337216179019596, "learning_rate": 8.315409827394996e-06, "loss": 17.5489, "step": 15930 }, { "epoch": 0.2912058785895772, "grad_norm": 8.794982143667239, "learning_rate": 8.315188243096421e-06, "loss": 19.0212, "step": 15931 }, { "epoch": 0.29122415778602373, "grad_norm": 7.48505748242081, "learning_rate": 8.3149666471784e-06, "loss": 17.9526, "step": 15932 }, { "epoch": 0.29124243698247027, "grad_norm": 5.8641357013429, "learning_rate": 8.314745039641708e-06, "loss": 17.4658, "step": 15933 }, { "epoch": 0.2912607161789168, "grad_norm": 6.536095360010347, "learning_rate": 8.314523420487122e-06, "loss": 17.5087, "step": 15934 }, { "epoch": 0.2912789953753633, "grad_norm": 6.992953286405548, "learning_rate": 8.314301789715419e-06, "loss": 17.6882, "step": 15935 }, { "epoch": 0.2912972745718098, "grad_norm": 5.702328158462788, "learning_rate": 8.314080147327376e-06, "loss": 17.1967, "step": 15936 }, { "epoch": 0.29131555376825635, "grad_norm": 6.203032349576431, "learning_rate": 8.31385849332377e-06, "loss": 17.5671, "step": 15937 }, { "epoch": 0.2913338329647029, "grad_norm": 7.527407895431283, "learning_rate": 8.313636827705376e-06, "loss": 17.6928, "step": 15938 }, { "epoch": 0.2913521121611494, "grad_norm": 6.583331630723015, "learning_rate": 8.313415150472974e-06, "loss": 17.6124, "step": 15939 }, { "epoch": 0.2913703913575959, "grad_norm": 5.763251944549388, "learning_rate": 8.31319346162734e-06, "loss": 17.0309, "step": 15940 }, { "epoch": 0.29138867055404244, "grad_norm": 5.955730027311099, "learning_rate": 8.31297176116925e-06, "loss": 17.1212, "step": 15941 }, { "epoch": 0.291406949750489, "grad_norm": 9.129691374230369, "learning_rate": 8.31275004909948e-06, "loss": 18.2103, "step": 15942 }, { "epoch": 0.2914252289469355, "grad_norm": 5.801987910334633, "learning_rate": 8.31252832541881e-06, "loss": 17.2921, "step": 15943 }, { "epoch": 0.291443508143382, "grad_norm": 5.2325612262252825, "learning_rate": 8.312306590128015e-06, "loss": 16.7652, "step": 15944 }, { "epoch": 0.2914617873398285, "grad_norm": 6.368534594401016, "learning_rate": 8.312084843227873e-06, "loss": 17.443, "step": 15945 }, { "epoch": 0.29148006653627506, "grad_norm": 6.093266259069604, "learning_rate": 8.311863084719161e-06, "loss": 17.4423, "step": 15946 }, { "epoch": 0.2914983457327216, "grad_norm": 7.184285949649789, "learning_rate": 8.311641314602657e-06, "loss": 17.6637, "step": 15947 }, { "epoch": 0.29151662492916813, "grad_norm": 6.639613039608739, "learning_rate": 8.311419532879137e-06, "loss": 17.3799, "step": 15948 }, { "epoch": 0.2915349041256146, "grad_norm": 6.936285137221981, "learning_rate": 8.311197739549378e-06, "loss": 18.0556, "step": 15949 }, { "epoch": 0.29155318332206115, "grad_norm": 7.292562960656055, "learning_rate": 8.31097593461416e-06, "loss": 18.0502, "step": 15950 }, { "epoch": 0.2915714625185077, "grad_norm": 7.423271528676154, "learning_rate": 8.310754118074258e-06, "loss": 17.7295, "step": 15951 }, { "epoch": 0.2915897417149542, "grad_norm": 6.857453059461951, "learning_rate": 8.310532289930449e-06, "loss": 17.9657, "step": 15952 }, { "epoch": 0.29160802091140076, "grad_norm": 6.849705261001428, "learning_rate": 8.310310450183512e-06, "loss": 17.6968, "step": 15953 }, { "epoch": 0.29162630010784724, "grad_norm": 6.846373698840621, "learning_rate": 8.310088598834226e-06, "loss": 17.7055, "step": 15954 }, { "epoch": 0.29164457930429377, "grad_norm": 5.983680416740459, "learning_rate": 8.309866735883365e-06, "loss": 17.2364, "step": 15955 }, { "epoch": 0.2916628585007403, "grad_norm": 7.717862622944557, "learning_rate": 8.309644861331707e-06, "loss": 18.4288, "step": 15956 }, { "epoch": 0.29168113769718684, "grad_norm": 6.166111719059242, "learning_rate": 8.309422975180036e-06, "loss": 17.4886, "step": 15957 }, { "epoch": 0.2916994168936334, "grad_norm": 6.259664873126532, "learning_rate": 8.30920107742912e-06, "loss": 17.6488, "step": 15958 }, { "epoch": 0.29171769609007986, "grad_norm": 5.640824133826954, "learning_rate": 8.308979168079742e-06, "loss": 17.5109, "step": 15959 }, { "epoch": 0.2917359752865264, "grad_norm": 5.6084383085018, "learning_rate": 8.308757247132679e-06, "loss": 17.3469, "step": 15960 }, { "epoch": 0.29175425448297293, "grad_norm": 5.7750588523022515, "learning_rate": 8.30853531458871e-06, "loss": 17.3453, "step": 15961 }, { "epoch": 0.29177253367941947, "grad_norm": 7.083811626591957, "learning_rate": 8.308313370448611e-06, "loss": 17.7419, "step": 15962 }, { "epoch": 0.291790812875866, "grad_norm": 7.035315389410472, "learning_rate": 8.308091414713162e-06, "loss": 17.8954, "step": 15963 }, { "epoch": 0.2918090920723125, "grad_norm": 6.371758361429908, "learning_rate": 8.307869447383139e-06, "loss": 17.5146, "step": 15964 }, { "epoch": 0.291827371268759, "grad_norm": 4.803194078634297, "learning_rate": 8.30764746845932e-06, "loss": 16.8787, "step": 15965 }, { "epoch": 0.29184565046520555, "grad_norm": 6.802235942204773, "learning_rate": 8.307425477942485e-06, "loss": 17.6656, "step": 15966 }, { "epoch": 0.2918639296616521, "grad_norm": 7.262053207815283, "learning_rate": 8.30720347583341e-06, "loss": 18.0788, "step": 15967 }, { "epoch": 0.2918822088580986, "grad_norm": 7.844899505760226, "learning_rate": 8.306981462132873e-06, "loss": 18.1277, "step": 15968 }, { "epoch": 0.2919004880545451, "grad_norm": 6.267639267917648, "learning_rate": 8.306759436841653e-06, "loss": 17.4688, "step": 15969 }, { "epoch": 0.29191876725099164, "grad_norm": 6.941059769945796, "learning_rate": 8.306537399960528e-06, "loss": 17.8929, "step": 15970 }, { "epoch": 0.2919370464474382, "grad_norm": 7.0941521241078, "learning_rate": 8.306315351490279e-06, "loss": 17.9915, "step": 15971 }, { "epoch": 0.2919553256438847, "grad_norm": 6.878623015133455, "learning_rate": 8.30609329143168e-06, "loss": 17.9059, "step": 15972 }, { "epoch": 0.29197360484033125, "grad_norm": 5.563252523447362, "learning_rate": 8.305871219785509e-06, "loss": 17.025, "step": 15973 }, { "epoch": 0.2919918840367777, "grad_norm": 5.711905719455599, "learning_rate": 8.30564913655255e-06, "loss": 17.2821, "step": 15974 }, { "epoch": 0.29201016323322426, "grad_norm": 6.946104489699604, "learning_rate": 8.305427041733573e-06, "loss": 17.7724, "step": 15975 }, { "epoch": 0.2920284424296708, "grad_norm": 5.92126910155695, "learning_rate": 8.305204935329365e-06, "loss": 17.3468, "step": 15976 }, { "epoch": 0.29204672162611733, "grad_norm": 9.455120274795474, "learning_rate": 8.304982817340699e-06, "loss": 18.5487, "step": 15977 }, { "epoch": 0.2920650008225638, "grad_norm": 6.930638307766184, "learning_rate": 8.304760687768355e-06, "loss": 17.6471, "step": 15978 }, { "epoch": 0.29208328001901035, "grad_norm": 7.163525702393523, "learning_rate": 8.304538546613111e-06, "loss": 18.0668, "step": 15979 }, { "epoch": 0.2921015592154569, "grad_norm": 6.304269962437889, "learning_rate": 8.304316393875746e-06, "loss": 17.4759, "step": 15980 }, { "epoch": 0.2921198384119034, "grad_norm": 6.6073744214449395, "learning_rate": 8.304094229557041e-06, "loss": 17.7584, "step": 15981 }, { "epoch": 0.29213811760834996, "grad_norm": 6.917237855040101, "learning_rate": 8.30387205365777e-06, "loss": 17.6626, "step": 15982 }, { "epoch": 0.29215639680479644, "grad_norm": 7.831925164171523, "learning_rate": 8.303649866178716e-06, "loss": 18.0621, "step": 15983 }, { "epoch": 0.29217467600124297, "grad_norm": 7.766589484273645, "learning_rate": 8.303427667120655e-06, "loss": 18.0887, "step": 15984 }, { "epoch": 0.2921929551976895, "grad_norm": 7.177838681952092, "learning_rate": 8.303205456484367e-06, "loss": 17.45, "step": 15985 }, { "epoch": 0.29221123439413604, "grad_norm": 6.036841186751471, "learning_rate": 8.30298323427063e-06, "loss": 17.4499, "step": 15986 }, { "epoch": 0.2922295135905826, "grad_norm": 7.082155768465045, "learning_rate": 8.302761000480223e-06, "loss": 17.826, "step": 15987 }, { "epoch": 0.29224779278702906, "grad_norm": 6.580597836497529, "learning_rate": 8.302538755113927e-06, "loss": 17.5262, "step": 15988 }, { "epoch": 0.2922660719834756, "grad_norm": 8.288081416192268, "learning_rate": 8.302316498172518e-06, "loss": 17.7942, "step": 15989 }, { "epoch": 0.29228435117992213, "grad_norm": 6.33771745536431, "learning_rate": 8.302094229656776e-06, "loss": 17.4197, "step": 15990 }, { "epoch": 0.29230263037636867, "grad_norm": 7.04313423853878, "learning_rate": 8.30187194956748e-06, "loss": 17.6455, "step": 15991 }, { "epoch": 0.2923209095728152, "grad_norm": 5.712160325010722, "learning_rate": 8.30164965790541e-06, "loss": 17.2575, "step": 15992 }, { "epoch": 0.2923391887692617, "grad_norm": 7.276121630883649, "learning_rate": 8.301427354671345e-06, "loss": 18.0208, "step": 15993 }, { "epoch": 0.2923574679657082, "grad_norm": 6.383029706669919, "learning_rate": 8.301205039866063e-06, "loss": 17.774, "step": 15994 }, { "epoch": 0.29237574716215475, "grad_norm": 6.627319589607053, "learning_rate": 8.300982713490344e-06, "loss": 17.5862, "step": 15995 }, { "epoch": 0.2923940263586013, "grad_norm": 7.220860590953939, "learning_rate": 8.300760375544967e-06, "loss": 17.9415, "step": 15996 }, { "epoch": 0.2924123055550478, "grad_norm": 6.978643408968633, "learning_rate": 8.300538026030712e-06, "loss": 17.569, "step": 15997 }, { "epoch": 0.2924305847514943, "grad_norm": 6.531278179387211, "learning_rate": 8.300315664948355e-06, "loss": 17.5871, "step": 15998 }, { "epoch": 0.29244886394794084, "grad_norm": 6.357891325954263, "learning_rate": 8.300093292298681e-06, "loss": 17.3975, "step": 15999 }, { "epoch": 0.2924671431443874, "grad_norm": 8.095016958306415, "learning_rate": 8.299870908082465e-06, "loss": 18.3559, "step": 16000 }, { "epoch": 0.2924854223408339, "grad_norm": 7.719323042506764, "learning_rate": 8.299648512300487e-06, "loss": 18.284, "step": 16001 }, { "epoch": 0.29250370153728045, "grad_norm": 7.570844006464962, "learning_rate": 8.29942610495353e-06, "loss": 17.994, "step": 16002 }, { "epoch": 0.2925219807337269, "grad_norm": 7.601757116027508, "learning_rate": 8.299203686042367e-06, "loss": 17.9397, "step": 16003 }, { "epoch": 0.29254025993017346, "grad_norm": 6.740646743610164, "learning_rate": 8.298981255567785e-06, "loss": 17.8283, "step": 16004 }, { "epoch": 0.29255853912662, "grad_norm": 6.308900735552201, "learning_rate": 8.298758813530559e-06, "loss": 17.4854, "step": 16005 }, { "epoch": 0.29257681832306653, "grad_norm": 7.362631597178887, "learning_rate": 8.298536359931469e-06, "loss": 18.0763, "step": 16006 }, { "epoch": 0.29259509751951307, "grad_norm": 5.7634680549419235, "learning_rate": 8.298313894771294e-06, "loss": 17.2389, "step": 16007 }, { "epoch": 0.29261337671595955, "grad_norm": 6.239958089881529, "learning_rate": 8.298091418050817e-06, "loss": 17.3514, "step": 16008 }, { "epoch": 0.2926316559124061, "grad_norm": 9.225766618544824, "learning_rate": 8.297868929770815e-06, "loss": 18.6185, "step": 16009 }, { "epoch": 0.2926499351088526, "grad_norm": 5.553300930458291, "learning_rate": 8.29764642993207e-06, "loss": 17.0238, "step": 16010 }, { "epoch": 0.29266821430529916, "grad_norm": 8.737045883777931, "learning_rate": 8.29742391853536e-06, "loss": 17.7353, "step": 16011 }, { "epoch": 0.29268649350174564, "grad_norm": 8.113213857813278, "learning_rate": 8.297201395581463e-06, "loss": 18.0144, "step": 16012 }, { "epoch": 0.2927047726981922, "grad_norm": 6.930256829166329, "learning_rate": 8.296978861071163e-06, "loss": 17.8505, "step": 16013 }, { "epoch": 0.2927230518946387, "grad_norm": 5.76729045647949, "learning_rate": 8.296756315005237e-06, "loss": 17.3446, "step": 16014 }, { "epoch": 0.29274133109108524, "grad_norm": 6.852138600941832, "learning_rate": 8.296533757384467e-06, "loss": 17.7116, "step": 16015 }, { "epoch": 0.2927596102875318, "grad_norm": 7.1098600478062535, "learning_rate": 8.296311188209634e-06, "loss": 17.5953, "step": 16016 }, { "epoch": 0.29277788948397826, "grad_norm": 6.152991216930885, "learning_rate": 8.296088607481514e-06, "loss": 17.6729, "step": 16017 }, { "epoch": 0.2927961686804248, "grad_norm": 6.699902350562617, "learning_rate": 8.295866015200889e-06, "loss": 17.5917, "step": 16018 }, { "epoch": 0.29281444787687133, "grad_norm": 6.718593645829086, "learning_rate": 8.29564341136854e-06, "loss": 17.4667, "step": 16019 }, { "epoch": 0.29283272707331787, "grad_norm": 6.238437528064015, "learning_rate": 8.29542079598525e-06, "loss": 17.4897, "step": 16020 }, { "epoch": 0.2928510062697644, "grad_norm": 7.803161615971059, "learning_rate": 8.295198169051792e-06, "loss": 17.9566, "step": 16021 }, { "epoch": 0.2928692854662109, "grad_norm": 5.851969317334456, "learning_rate": 8.294975530568952e-06, "loss": 17.2075, "step": 16022 }, { "epoch": 0.2928875646626574, "grad_norm": 6.903866718538288, "learning_rate": 8.29475288053751e-06, "loss": 17.684, "step": 16023 }, { "epoch": 0.29290584385910395, "grad_norm": 9.073972148485757, "learning_rate": 8.294530218958243e-06, "loss": 18.4389, "step": 16024 }, { "epoch": 0.2929241230555505, "grad_norm": 5.92501062690928, "learning_rate": 8.294307545831935e-06, "loss": 17.5402, "step": 16025 }, { "epoch": 0.292942402251997, "grad_norm": 5.2315268846003695, "learning_rate": 8.294084861159363e-06, "loss": 16.9985, "step": 16026 }, { "epoch": 0.2929606814484435, "grad_norm": 6.86822506179636, "learning_rate": 8.293862164941311e-06, "loss": 17.5747, "step": 16027 }, { "epoch": 0.29297896064489004, "grad_norm": 5.756534441651972, "learning_rate": 8.293639457178557e-06, "loss": 17.2154, "step": 16028 }, { "epoch": 0.2929972398413366, "grad_norm": 8.803201480989898, "learning_rate": 8.293416737871882e-06, "loss": 18.1632, "step": 16029 }, { "epoch": 0.2930155190377831, "grad_norm": 7.056439052161994, "learning_rate": 8.29319400702207e-06, "loss": 18.0589, "step": 16030 }, { "epoch": 0.29303379823422965, "grad_norm": 7.8384031209301375, "learning_rate": 8.292971264629895e-06, "loss": 18.3924, "step": 16031 }, { "epoch": 0.29305207743067613, "grad_norm": 5.30923451141389, "learning_rate": 8.292748510696144e-06, "loss": 17.0437, "step": 16032 }, { "epoch": 0.29307035662712266, "grad_norm": 5.420403371338054, "learning_rate": 8.292525745221595e-06, "loss": 17.1702, "step": 16033 }, { "epoch": 0.2930886358235692, "grad_norm": 8.135822152292993, "learning_rate": 8.292302968207028e-06, "loss": 17.872, "step": 16034 }, { "epoch": 0.29310691502001573, "grad_norm": 4.77519323958378, "learning_rate": 8.292080179653225e-06, "loss": 16.8895, "step": 16035 }, { "epoch": 0.29312519421646227, "grad_norm": 6.505618821002592, "learning_rate": 8.291857379560968e-06, "loss": 17.8212, "step": 16036 }, { "epoch": 0.29314347341290875, "grad_norm": 7.792316429728093, "learning_rate": 8.291634567931036e-06, "loss": 17.7504, "step": 16037 }, { "epoch": 0.2931617526093553, "grad_norm": 6.299430618102456, "learning_rate": 8.291411744764209e-06, "loss": 17.4734, "step": 16038 }, { "epoch": 0.2931800318058018, "grad_norm": 7.2576532554125235, "learning_rate": 8.29118891006127e-06, "loss": 17.9509, "step": 16039 }, { "epoch": 0.29319831100224836, "grad_norm": 8.165665908077704, "learning_rate": 8.290966063823e-06, "loss": 18.0751, "step": 16040 }, { "epoch": 0.2932165901986949, "grad_norm": 7.610675332319149, "learning_rate": 8.29074320605018e-06, "loss": 17.8204, "step": 16041 }, { "epoch": 0.2932348693951414, "grad_norm": 6.104090895652702, "learning_rate": 8.290520336743589e-06, "loss": 17.5176, "step": 16042 }, { "epoch": 0.2932531485915879, "grad_norm": 7.843404621359061, "learning_rate": 8.290297455904011e-06, "loss": 18.1972, "step": 16043 }, { "epoch": 0.29327142778803444, "grad_norm": 8.210727392862546, "learning_rate": 8.290074563532227e-06, "loss": 18.4069, "step": 16044 }, { "epoch": 0.293289706984481, "grad_norm": 5.698596075513861, "learning_rate": 8.289851659629014e-06, "loss": 17.1512, "step": 16045 }, { "epoch": 0.29330798618092746, "grad_norm": 7.1254844095808565, "learning_rate": 8.28962874419516e-06, "loss": 17.9916, "step": 16046 }, { "epoch": 0.293326265377374, "grad_norm": 6.408700500381423, "learning_rate": 8.289405817231439e-06, "loss": 17.7296, "step": 16047 }, { "epoch": 0.29334454457382053, "grad_norm": 6.294001819723074, "learning_rate": 8.28918287873864e-06, "loss": 17.5453, "step": 16048 }, { "epoch": 0.29336282377026707, "grad_norm": 8.891507157521445, "learning_rate": 8.288959928717538e-06, "loss": 17.6735, "step": 16049 }, { "epoch": 0.2933811029667136, "grad_norm": 6.445046404182921, "learning_rate": 8.288736967168917e-06, "loss": 17.4465, "step": 16050 }, { "epoch": 0.2933993821631601, "grad_norm": 6.512188189130023, "learning_rate": 8.288513994093558e-06, "loss": 18.1263, "step": 16051 }, { "epoch": 0.2934176613596066, "grad_norm": 6.035160251880754, "learning_rate": 8.288291009492245e-06, "loss": 17.4996, "step": 16052 }, { "epoch": 0.29343594055605315, "grad_norm": 5.99175988250994, "learning_rate": 8.288068013365755e-06, "loss": 17.3569, "step": 16053 }, { "epoch": 0.2934542197524997, "grad_norm": 6.9243016139231015, "learning_rate": 8.287845005714872e-06, "loss": 17.4461, "step": 16054 }, { "epoch": 0.2934724989489462, "grad_norm": 7.028727291165785, "learning_rate": 8.287621986540379e-06, "loss": 17.3805, "step": 16055 }, { "epoch": 0.2934907781453927, "grad_norm": 4.964826885366487, "learning_rate": 8.287398955843056e-06, "loss": 16.9784, "step": 16056 }, { "epoch": 0.29350905734183924, "grad_norm": 5.559467036698685, "learning_rate": 8.287175913623683e-06, "loss": 17.0553, "step": 16057 }, { "epoch": 0.2935273365382858, "grad_norm": 5.561341548724766, "learning_rate": 8.286952859883046e-06, "loss": 17.0916, "step": 16058 }, { "epoch": 0.2935456157347323, "grad_norm": 8.510322733422155, "learning_rate": 8.286729794621924e-06, "loss": 18.8003, "step": 16059 }, { "epoch": 0.29356389493117885, "grad_norm": 7.407605325077076, "learning_rate": 8.286506717841098e-06, "loss": 17.7896, "step": 16060 }, { "epoch": 0.29358217412762533, "grad_norm": 6.351108811363999, "learning_rate": 8.286283629541354e-06, "loss": 17.6701, "step": 16061 }, { "epoch": 0.29360045332407186, "grad_norm": 6.637702791225121, "learning_rate": 8.286060529723467e-06, "loss": 17.6626, "step": 16062 }, { "epoch": 0.2936187325205184, "grad_norm": 6.9148735357227755, "learning_rate": 8.285837418388225e-06, "loss": 17.7434, "step": 16063 }, { "epoch": 0.29363701171696494, "grad_norm": 6.5891439576524755, "learning_rate": 8.285614295536408e-06, "loss": 17.6507, "step": 16064 }, { "epoch": 0.29365529091341147, "grad_norm": 7.0251678746844615, "learning_rate": 8.285391161168798e-06, "loss": 17.5474, "step": 16065 }, { "epoch": 0.29367357010985795, "grad_norm": 7.031143782963527, "learning_rate": 8.285168015286177e-06, "loss": 17.8143, "step": 16066 }, { "epoch": 0.2936918493063045, "grad_norm": 7.893692081698446, "learning_rate": 8.284944857889327e-06, "loss": 18.5228, "step": 16067 }, { "epoch": 0.293710128502751, "grad_norm": 6.516554887842644, "learning_rate": 8.284721688979032e-06, "loss": 17.3555, "step": 16068 }, { "epoch": 0.29372840769919756, "grad_norm": 6.303948742692822, "learning_rate": 8.284498508556072e-06, "loss": 17.4553, "step": 16069 }, { "epoch": 0.2937466868956441, "grad_norm": 8.522657992212642, "learning_rate": 8.284275316621227e-06, "loss": 18.4694, "step": 16070 }, { "epoch": 0.2937649660920906, "grad_norm": 6.756765906445806, "learning_rate": 8.284052113175285e-06, "loss": 17.8483, "step": 16071 }, { "epoch": 0.2937832452885371, "grad_norm": 6.263059521885723, "learning_rate": 8.283828898219025e-06, "loss": 17.4964, "step": 16072 }, { "epoch": 0.29380152448498364, "grad_norm": 6.480726018118758, "learning_rate": 8.283605671753228e-06, "loss": 17.6313, "step": 16073 }, { "epoch": 0.2938198036814302, "grad_norm": 6.316229642215812, "learning_rate": 8.283382433778678e-06, "loss": 17.733, "step": 16074 }, { "epoch": 0.2938380828778767, "grad_norm": 5.5538384810390005, "learning_rate": 8.283159184296158e-06, "loss": 16.9584, "step": 16075 }, { "epoch": 0.2938563620743232, "grad_norm": 6.588095026098765, "learning_rate": 8.282935923306452e-06, "loss": 17.619, "step": 16076 }, { "epoch": 0.29387464127076973, "grad_norm": 6.07862577927372, "learning_rate": 8.282712650810339e-06, "loss": 17.302, "step": 16077 }, { "epoch": 0.29389292046721627, "grad_norm": 7.78484540887961, "learning_rate": 8.282489366808603e-06, "loss": 18.1963, "step": 16078 }, { "epoch": 0.2939111996636628, "grad_norm": 6.954039498244128, "learning_rate": 8.282266071302025e-06, "loss": 17.9513, "step": 16079 }, { "epoch": 0.2939294788601093, "grad_norm": 6.391492039911019, "learning_rate": 8.282042764291392e-06, "loss": 17.5255, "step": 16080 }, { "epoch": 0.2939477580565558, "grad_norm": 6.943123103228119, "learning_rate": 8.281819445777483e-06, "loss": 17.8471, "step": 16081 }, { "epoch": 0.29396603725300235, "grad_norm": 4.913061747853871, "learning_rate": 8.281596115761082e-06, "loss": 16.976, "step": 16082 }, { "epoch": 0.2939843164494489, "grad_norm": 6.9758949576322165, "learning_rate": 8.281372774242968e-06, "loss": 17.8159, "step": 16083 }, { "epoch": 0.2940025956458954, "grad_norm": 6.624762103518529, "learning_rate": 8.28114942122393e-06, "loss": 17.527, "step": 16084 }, { "epoch": 0.2940208748423419, "grad_norm": 7.075738927009336, "learning_rate": 8.28092605670475e-06, "loss": 17.815, "step": 16085 }, { "epoch": 0.29403915403878844, "grad_norm": 6.632446800746286, "learning_rate": 8.280702680686206e-06, "loss": 17.7209, "step": 16086 }, { "epoch": 0.294057433235235, "grad_norm": 7.477268407655306, "learning_rate": 8.280479293169083e-06, "loss": 17.8343, "step": 16087 }, { "epoch": 0.2940757124316815, "grad_norm": 7.735048943739937, "learning_rate": 8.280255894154167e-06, "loss": 17.6479, "step": 16088 }, { "epoch": 0.29409399162812805, "grad_norm": 6.944062305694892, "learning_rate": 8.280032483642238e-06, "loss": 18.0583, "step": 16089 }, { "epoch": 0.29411227082457453, "grad_norm": 6.292512509475764, "learning_rate": 8.27980906163408e-06, "loss": 17.3399, "step": 16090 }, { "epoch": 0.29413055002102106, "grad_norm": 6.856975000258546, "learning_rate": 8.279585628130476e-06, "loss": 17.7249, "step": 16091 }, { "epoch": 0.2941488292174676, "grad_norm": 6.587628718956014, "learning_rate": 8.279362183132208e-06, "loss": 17.5801, "step": 16092 }, { "epoch": 0.29416710841391414, "grad_norm": 6.10922530217729, "learning_rate": 8.27913872664006e-06, "loss": 17.3902, "step": 16093 }, { "epoch": 0.29418538761036067, "grad_norm": 5.769721979891175, "learning_rate": 8.278915258654816e-06, "loss": 17.4178, "step": 16094 }, { "epoch": 0.29420366680680715, "grad_norm": 8.461191822315362, "learning_rate": 8.27869177917726e-06, "loss": 18.7004, "step": 16095 }, { "epoch": 0.2942219460032537, "grad_norm": 5.960529254388798, "learning_rate": 8.278468288208173e-06, "loss": 17.2166, "step": 16096 }, { "epoch": 0.2942402251997002, "grad_norm": 6.0029111031728135, "learning_rate": 8.278244785748337e-06, "loss": 17.3049, "step": 16097 }, { "epoch": 0.29425850439614676, "grad_norm": 7.2388768396319, "learning_rate": 8.27802127179854e-06, "loss": 17.8213, "step": 16098 }, { "epoch": 0.2942767835925933, "grad_norm": 6.964455211596097, "learning_rate": 8.277797746359562e-06, "loss": 17.8663, "step": 16099 }, { "epoch": 0.2942950627890398, "grad_norm": 7.143780228227318, "learning_rate": 8.277574209432187e-06, "loss": 17.6374, "step": 16100 }, { "epoch": 0.2943133419854863, "grad_norm": 7.404051063205473, "learning_rate": 8.277350661017198e-06, "loss": 18.0763, "step": 16101 }, { "epoch": 0.29433162118193285, "grad_norm": 5.681771181830303, "learning_rate": 8.277127101115381e-06, "loss": 17.1563, "step": 16102 }, { "epoch": 0.2943499003783794, "grad_norm": 6.541284403697504, "learning_rate": 8.276903529727517e-06, "loss": 17.4682, "step": 16103 }, { "epoch": 0.2943681795748259, "grad_norm": 6.463094141425445, "learning_rate": 8.276679946854392e-06, "loss": 17.3894, "step": 16104 }, { "epoch": 0.2943864587712724, "grad_norm": 7.1175319393264305, "learning_rate": 8.276456352496785e-06, "loss": 17.7138, "step": 16105 }, { "epoch": 0.29440473796771893, "grad_norm": 7.0808893139446605, "learning_rate": 8.276232746655485e-06, "loss": 17.5719, "step": 16106 }, { "epoch": 0.29442301716416547, "grad_norm": 5.617139148213048, "learning_rate": 8.276009129331273e-06, "loss": 17.3385, "step": 16107 }, { "epoch": 0.294441296360612, "grad_norm": 6.638985159144182, "learning_rate": 8.275785500524933e-06, "loss": 17.6396, "step": 16108 }, { "epoch": 0.29445957555705854, "grad_norm": 6.126398449831048, "learning_rate": 8.275561860237252e-06, "loss": 17.4253, "step": 16109 }, { "epoch": 0.294477854753505, "grad_norm": 7.9825261159233625, "learning_rate": 8.275338208469007e-06, "loss": 18.7895, "step": 16110 }, { "epoch": 0.29449613394995156, "grad_norm": 6.348114834009812, "learning_rate": 8.275114545220986e-06, "loss": 17.5388, "step": 16111 }, { "epoch": 0.2945144131463981, "grad_norm": 6.363968832816221, "learning_rate": 8.274890870493975e-06, "loss": 17.4444, "step": 16112 }, { "epoch": 0.2945326923428446, "grad_norm": 6.605263007879188, "learning_rate": 8.274667184288755e-06, "loss": 17.6027, "step": 16113 }, { "epoch": 0.2945509715392911, "grad_norm": 5.957730997015338, "learning_rate": 8.27444348660611e-06, "loss": 17.288, "step": 16114 }, { "epoch": 0.29456925073573764, "grad_norm": 7.155158853752062, "learning_rate": 8.274219777446826e-06, "loss": 18.0591, "step": 16115 }, { "epoch": 0.2945875299321842, "grad_norm": 5.690349804479167, "learning_rate": 8.273996056811684e-06, "loss": 17.1678, "step": 16116 }, { "epoch": 0.2946058091286307, "grad_norm": 8.759875273515886, "learning_rate": 8.27377232470147e-06, "loss": 18.4531, "step": 16117 }, { "epoch": 0.29462408832507725, "grad_norm": 7.071037803884969, "learning_rate": 8.27354858111697e-06, "loss": 17.9133, "step": 16118 }, { "epoch": 0.29464236752152373, "grad_norm": 5.211525432522588, "learning_rate": 8.273324826058966e-06, "loss": 17.1834, "step": 16119 }, { "epoch": 0.29466064671797026, "grad_norm": 6.635936234020447, "learning_rate": 8.273101059528242e-06, "loss": 17.4478, "step": 16120 }, { "epoch": 0.2946789259144168, "grad_norm": 5.075378179443972, "learning_rate": 8.272877281525581e-06, "loss": 16.7821, "step": 16121 }, { "epoch": 0.29469720511086334, "grad_norm": 7.059489589531912, "learning_rate": 8.27265349205177e-06, "loss": 18.1215, "step": 16122 }, { "epoch": 0.29471548430730987, "grad_norm": 9.00772735041651, "learning_rate": 8.272429691107595e-06, "loss": 17.2768, "step": 16123 }, { "epoch": 0.29473376350375635, "grad_norm": 6.863778251720272, "learning_rate": 8.272205878693835e-06, "loss": 18.007, "step": 16124 }, { "epoch": 0.2947520427002029, "grad_norm": 5.685395242343081, "learning_rate": 8.271982054811279e-06, "loss": 17.2372, "step": 16125 }, { "epoch": 0.2947703218966494, "grad_norm": 7.226228088489144, "learning_rate": 8.271758219460708e-06, "loss": 17.967, "step": 16126 }, { "epoch": 0.29478860109309596, "grad_norm": 6.642752145506515, "learning_rate": 8.27153437264291e-06, "loss": 17.7786, "step": 16127 }, { "epoch": 0.2948068802895425, "grad_norm": 7.521831132525992, "learning_rate": 8.271310514358667e-06, "loss": 18.4123, "step": 16128 }, { "epoch": 0.294825159485989, "grad_norm": 7.055337495464242, "learning_rate": 8.271086644608766e-06, "loss": 17.9119, "step": 16129 }, { "epoch": 0.2948434386824355, "grad_norm": 6.715668028490851, "learning_rate": 8.27086276339399e-06, "loss": 17.9008, "step": 16130 }, { "epoch": 0.29486171787888205, "grad_norm": 6.226074771096643, "learning_rate": 8.270638870715122e-06, "loss": 17.6105, "step": 16131 }, { "epoch": 0.2948799970753286, "grad_norm": 7.970005508418957, "learning_rate": 8.27041496657295e-06, "loss": 18.0794, "step": 16132 }, { "epoch": 0.2948982762717751, "grad_norm": 7.312806125808853, "learning_rate": 8.270191050968257e-06, "loss": 17.7882, "step": 16133 }, { "epoch": 0.2949165554682216, "grad_norm": 7.343051756783496, "learning_rate": 8.269967123901828e-06, "loss": 17.7306, "step": 16134 }, { "epoch": 0.29493483466466813, "grad_norm": 6.028786414168179, "learning_rate": 8.269743185374449e-06, "loss": 17.3196, "step": 16135 }, { "epoch": 0.29495311386111467, "grad_norm": 7.446803114877071, "learning_rate": 8.269519235386902e-06, "loss": 18.0951, "step": 16136 }, { "epoch": 0.2949713930575612, "grad_norm": 5.85371185484508, "learning_rate": 8.269295273939974e-06, "loss": 17.2621, "step": 16137 }, { "epoch": 0.29498967225400774, "grad_norm": 6.221291944454979, "learning_rate": 8.26907130103445e-06, "loss": 17.3017, "step": 16138 }, { "epoch": 0.2950079514504542, "grad_norm": 6.240179982039564, "learning_rate": 8.268847316671116e-06, "loss": 17.4279, "step": 16139 }, { "epoch": 0.29502623064690076, "grad_norm": 7.634717676856093, "learning_rate": 8.268623320850755e-06, "loss": 17.6661, "step": 16140 }, { "epoch": 0.2950445098433473, "grad_norm": 7.903036835138847, "learning_rate": 8.268399313574154e-06, "loss": 18.2374, "step": 16141 }, { "epoch": 0.2950627890397938, "grad_norm": 6.865211273666724, "learning_rate": 8.268175294842096e-06, "loss": 17.7922, "step": 16142 }, { "epoch": 0.29508106823624036, "grad_norm": 6.0218696280341355, "learning_rate": 8.267951264655367e-06, "loss": 17.1883, "step": 16143 }, { "epoch": 0.29509934743268684, "grad_norm": 5.3851609690899656, "learning_rate": 8.267727223014752e-06, "loss": 17.3298, "step": 16144 }, { "epoch": 0.2951176266291334, "grad_norm": 7.415122471618471, "learning_rate": 8.267503169921037e-06, "loss": 17.9095, "step": 16145 }, { "epoch": 0.2951359058255799, "grad_norm": 7.143329963730976, "learning_rate": 8.267279105375007e-06, "loss": 17.428, "step": 16146 }, { "epoch": 0.29515418502202645, "grad_norm": 6.871888122379926, "learning_rate": 8.267055029377448e-06, "loss": 17.5288, "step": 16147 }, { "epoch": 0.29517246421847293, "grad_norm": 8.243891775073257, "learning_rate": 8.266830941929144e-06, "loss": 17.8825, "step": 16148 }, { "epoch": 0.29519074341491947, "grad_norm": 6.784484797692967, "learning_rate": 8.26660684303088e-06, "loss": 17.7391, "step": 16149 }, { "epoch": 0.295209022611366, "grad_norm": 9.112605991119066, "learning_rate": 8.266382732683445e-06, "loss": 18.6377, "step": 16150 }, { "epoch": 0.29522730180781254, "grad_norm": 6.986089492669639, "learning_rate": 8.26615861088762e-06, "loss": 17.7572, "step": 16151 }, { "epoch": 0.2952455810042591, "grad_norm": 5.6192299528481175, "learning_rate": 8.265934477644193e-06, "loss": 17.2478, "step": 16152 }, { "epoch": 0.29526386020070555, "grad_norm": 6.16900873742378, "learning_rate": 8.265710332953949e-06, "loss": 17.5938, "step": 16153 }, { "epoch": 0.2952821393971521, "grad_norm": 7.15423716363989, "learning_rate": 8.265486176817675e-06, "loss": 17.6952, "step": 16154 }, { "epoch": 0.2953004185935986, "grad_norm": 5.2767902577615695, "learning_rate": 8.265262009236152e-06, "loss": 17.137, "step": 16155 }, { "epoch": 0.29531869779004516, "grad_norm": 6.146251710364802, "learning_rate": 8.265037830210172e-06, "loss": 17.4041, "step": 16156 }, { "epoch": 0.2953369769864917, "grad_norm": 6.758768374203726, "learning_rate": 8.264813639740517e-06, "loss": 17.8532, "step": 16157 }, { "epoch": 0.2953552561829382, "grad_norm": 6.4511848223802195, "learning_rate": 8.264589437827971e-06, "loss": 17.5946, "step": 16158 }, { "epoch": 0.2953735353793847, "grad_norm": 6.447252327774815, "learning_rate": 8.264365224473327e-06, "loss": 17.6131, "step": 16159 }, { "epoch": 0.29539181457583125, "grad_norm": 6.5147614417505775, "learning_rate": 8.264140999677363e-06, "loss": 17.4324, "step": 16160 }, { "epoch": 0.2954100937722778, "grad_norm": 5.917691681255722, "learning_rate": 8.26391676344087e-06, "loss": 17.3937, "step": 16161 }, { "epoch": 0.2954283729687243, "grad_norm": 5.627604957337583, "learning_rate": 8.26369251576463e-06, "loss": 17.1035, "step": 16162 }, { "epoch": 0.2954466521651708, "grad_norm": 6.813752414980774, "learning_rate": 8.263468256649432e-06, "loss": 17.7017, "step": 16163 }, { "epoch": 0.29546493136161733, "grad_norm": 7.170450657184277, "learning_rate": 8.26324398609606e-06, "loss": 17.8039, "step": 16164 }, { "epoch": 0.29548321055806387, "grad_norm": 6.794922461702556, "learning_rate": 8.263019704105301e-06, "loss": 17.6664, "step": 16165 }, { "epoch": 0.2955014897545104, "grad_norm": 9.545740247540214, "learning_rate": 8.262795410677942e-06, "loss": 17.8033, "step": 16166 }, { "epoch": 0.29551976895095694, "grad_norm": 5.873040185505237, "learning_rate": 8.262571105814768e-06, "loss": 17.3408, "step": 16167 }, { "epoch": 0.2955380481474034, "grad_norm": 7.591817860083055, "learning_rate": 8.262346789516567e-06, "loss": 18.2472, "step": 16168 }, { "epoch": 0.29555632734384996, "grad_norm": 6.756312239314254, "learning_rate": 8.262122461784121e-06, "loss": 17.5218, "step": 16169 }, { "epoch": 0.2955746065402965, "grad_norm": 6.458097026716254, "learning_rate": 8.26189812261822e-06, "loss": 17.6256, "step": 16170 }, { "epoch": 0.295592885736743, "grad_norm": 6.966415218401644, "learning_rate": 8.261673772019649e-06, "loss": 17.4215, "step": 16171 }, { "epoch": 0.29561116493318956, "grad_norm": 6.382851441378164, "learning_rate": 8.261449409989194e-06, "loss": 17.7032, "step": 16172 }, { "epoch": 0.29562944412963604, "grad_norm": 5.805456529681644, "learning_rate": 8.261225036527642e-06, "loss": 17.3363, "step": 16173 }, { "epoch": 0.2956477233260826, "grad_norm": 6.408086235868578, "learning_rate": 8.26100065163578e-06, "loss": 17.4591, "step": 16174 }, { "epoch": 0.2956660025225291, "grad_norm": 5.1312227955501895, "learning_rate": 8.260776255314394e-06, "loss": 17.0203, "step": 16175 }, { "epoch": 0.29568428171897565, "grad_norm": 6.232576056759392, "learning_rate": 8.260551847564268e-06, "loss": 17.4782, "step": 16176 }, { "epoch": 0.2957025609154222, "grad_norm": 6.546301101195819, "learning_rate": 8.260327428386191e-06, "loss": 17.5471, "step": 16177 }, { "epoch": 0.29572084011186867, "grad_norm": 7.393112461830841, "learning_rate": 8.260102997780952e-06, "loss": 18.0688, "step": 16178 }, { "epoch": 0.2957391193083152, "grad_norm": 5.768559251810734, "learning_rate": 8.259878555749332e-06, "loss": 17.2527, "step": 16179 }, { "epoch": 0.29575739850476174, "grad_norm": 6.482737775699103, "learning_rate": 8.259654102292123e-06, "loss": 17.5944, "step": 16180 }, { "epoch": 0.2957756777012083, "grad_norm": 8.191801246187003, "learning_rate": 8.259429637410108e-06, "loss": 17.6921, "step": 16181 }, { "epoch": 0.29579395689765475, "grad_norm": 6.895390327766547, "learning_rate": 8.259205161104075e-06, "loss": 17.6604, "step": 16182 }, { "epoch": 0.2958122360941013, "grad_norm": 6.8447549875082805, "learning_rate": 8.25898067337481e-06, "loss": 17.6201, "step": 16183 }, { "epoch": 0.2958305152905478, "grad_norm": 6.061554104175592, "learning_rate": 8.258756174223101e-06, "loss": 17.4606, "step": 16184 }, { "epoch": 0.29584879448699436, "grad_norm": 10.694557749786986, "learning_rate": 8.258531663649735e-06, "loss": 19.1512, "step": 16185 }, { "epoch": 0.2958670736834409, "grad_norm": 7.995631273483882, "learning_rate": 8.258307141655499e-06, "loss": 18.1532, "step": 16186 }, { "epoch": 0.2958853528798874, "grad_norm": 6.339244400609893, "learning_rate": 8.258082608241177e-06, "loss": 17.1876, "step": 16187 }, { "epoch": 0.2959036320763339, "grad_norm": 7.27091552991875, "learning_rate": 8.25785806340756e-06, "loss": 17.7918, "step": 16188 }, { "epoch": 0.29592191127278045, "grad_norm": 7.415439253092029, "learning_rate": 8.257633507155431e-06, "loss": 17.6777, "step": 16189 }, { "epoch": 0.295940190469227, "grad_norm": 5.553385193729211, "learning_rate": 8.257408939485582e-06, "loss": 17.097, "step": 16190 }, { "epoch": 0.2959584696656735, "grad_norm": 6.635004356880178, "learning_rate": 8.257184360398796e-06, "loss": 17.4957, "step": 16191 }, { "epoch": 0.29597674886212, "grad_norm": 7.517149146968689, "learning_rate": 8.256959769895861e-06, "loss": 17.5058, "step": 16192 }, { "epoch": 0.29599502805856653, "grad_norm": 6.089891699890243, "learning_rate": 8.256735167977566e-06, "loss": 17.3818, "step": 16193 }, { "epoch": 0.29601330725501307, "grad_norm": 5.788348349656729, "learning_rate": 8.256510554644696e-06, "loss": 17.1936, "step": 16194 }, { "epoch": 0.2960315864514596, "grad_norm": 4.823369899702506, "learning_rate": 8.25628592989804e-06, "loss": 16.9966, "step": 16195 }, { "epoch": 0.29604986564790614, "grad_norm": 7.191284231122164, "learning_rate": 8.256061293738382e-06, "loss": 17.8501, "step": 16196 }, { "epoch": 0.2960681448443526, "grad_norm": 7.338598339373476, "learning_rate": 8.255836646166512e-06, "loss": 17.7325, "step": 16197 }, { "epoch": 0.29608642404079916, "grad_norm": 5.882320845772196, "learning_rate": 8.255611987183218e-06, "loss": 17.3934, "step": 16198 }, { "epoch": 0.2961047032372457, "grad_norm": 7.050088584913749, "learning_rate": 8.255387316789289e-06, "loss": 17.8632, "step": 16199 }, { "epoch": 0.29612298243369223, "grad_norm": 7.354316577228631, "learning_rate": 8.255162634985508e-06, "loss": 17.7189, "step": 16200 }, { "epoch": 0.29614126163013876, "grad_norm": 6.596201647834317, "learning_rate": 8.254937941772663e-06, "loss": 17.8398, "step": 16201 }, { "epoch": 0.29615954082658524, "grad_norm": 5.997520771429841, "learning_rate": 8.254713237151546e-06, "loss": 17.3732, "step": 16202 }, { "epoch": 0.2961778200230318, "grad_norm": 5.567684933785494, "learning_rate": 8.254488521122937e-06, "loss": 17.0583, "step": 16203 }, { "epoch": 0.2961960992194783, "grad_norm": 6.988167398003105, "learning_rate": 8.25426379368763e-06, "loss": 17.4395, "step": 16204 }, { "epoch": 0.29621437841592485, "grad_norm": 6.334472459987664, "learning_rate": 8.254039054846413e-06, "loss": 17.3746, "step": 16205 }, { "epoch": 0.2962326576123714, "grad_norm": 6.792457675376119, "learning_rate": 8.25381430460007e-06, "loss": 17.6924, "step": 16206 }, { "epoch": 0.29625093680881787, "grad_norm": 6.543604059497521, "learning_rate": 8.253589542949391e-06, "loss": 17.4972, "step": 16207 }, { "epoch": 0.2962692160052644, "grad_norm": 8.213450685786313, "learning_rate": 8.25336476989516e-06, "loss": 18.0698, "step": 16208 }, { "epoch": 0.29628749520171094, "grad_norm": 6.590426678547136, "learning_rate": 8.253139985438172e-06, "loss": 17.5321, "step": 16209 }, { "epoch": 0.2963057743981575, "grad_norm": 5.826370838699314, "learning_rate": 8.252915189579209e-06, "loss": 17.3427, "step": 16210 }, { "epoch": 0.296324053594604, "grad_norm": 6.142322848068911, "learning_rate": 8.25269038231906e-06, "loss": 17.4707, "step": 16211 }, { "epoch": 0.2963423327910505, "grad_norm": 6.313431713920389, "learning_rate": 8.252465563658514e-06, "loss": 17.3804, "step": 16212 }, { "epoch": 0.296360611987497, "grad_norm": 6.7735844023314815, "learning_rate": 8.252240733598357e-06, "loss": 17.5166, "step": 16213 }, { "epoch": 0.29637889118394356, "grad_norm": 6.634904623775015, "learning_rate": 8.25201589213938e-06, "loss": 17.3804, "step": 16214 }, { "epoch": 0.2963971703803901, "grad_norm": 6.574053095469506, "learning_rate": 8.251791039282369e-06, "loss": 17.6128, "step": 16215 }, { "epoch": 0.2964154495768366, "grad_norm": 6.523212168595939, "learning_rate": 8.251566175028114e-06, "loss": 17.2457, "step": 16216 }, { "epoch": 0.2964337287732831, "grad_norm": 7.227468536478085, "learning_rate": 8.2513412993774e-06, "loss": 17.6916, "step": 16217 }, { "epoch": 0.29645200796972965, "grad_norm": 6.522893347865626, "learning_rate": 8.251116412331017e-06, "loss": 17.553, "step": 16218 }, { "epoch": 0.2964702871661762, "grad_norm": 5.442161858411724, "learning_rate": 8.250891513889754e-06, "loss": 17.1043, "step": 16219 }, { "epoch": 0.2964885663626227, "grad_norm": 6.756243611043344, "learning_rate": 8.250666604054396e-06, "loss": 17.7974, "step": 16220 }, { "epoch": 0.2965068455590692, "grad_norm": 6.723419642373557, "learning_rate": 8.250441682825736e-06, "loss": 17.526, "step": 16221 }, { "epoch": 0.29652512475551573, "grad_norm": 6.723178081527187, "learning_rate": 8.250216750204559e-06, "loss": 17.7818, "step": 16222 }, { "epoch": 0.29654340395196227, "grad_norm": 6.47743698662654, "learning_rate": 8.249991806191656e-06, "loss": 17.6314, "step": 16223 }, { "epoch": 0.2965616831484088, "grad_norm": 6.9977355079827674, "learning_rate": 8.249766850787811e-06, "loss": 17.6568, "step": 16224 }, { "epoch": 0.29657996234485534, "grad_norm": 6.719055048688414, "learning_rate": 8.249541883993816e-06, "loss": 17.4425, "step": 16225 }, { "epoch": 0.2965982415413018, "grad_norm": 7.75375840497314, "learning_rate": 8.24931690581046e-06, "loss": 18.2105, "step": 16226 }, { "epoch": 0.29661652073774836, "grad_norm": 6.969221779509029, "learning_rate": 8.24909191623853e-06, "loss": 17.887, "step": 16227 }, { "epoch": 0.2966347999341949, "grad_norm": 7.026830897623245, "learning_rate": 8.248866915278814e-06, "loss": 17.8746, "step": 16228 }, { "epoch": 0.29665307913064143, "grad_norm": 6.494742477544057, "learning_rate": 8.248641902932102e-06, "loss": 17.6973, "step": 16229 }, { "epoch": 0.29667135832708796, "grad_norm": 7.425141486599022, "learning_rate": 8.248416879199182e-06, "loss": 17.9923, "step": 16230 }, { "epoch": 0.29668963752353444, "grad_norm": 6.136964101879413, "learning_rate": 8.248191844080841e-06, "loss": 17.4555, "step": 16231 }, { "epoch": 0.296707916719981, "grad_norm": 6.159605954960472, "learning_rate": 8.247966797577871e-06, "loss": 17.4765, "step": 16232 }, { "epoch": 0.2967261959164275, "grad_norm": 6.623923583884193, "learning_rate": 8.24774173969106e-06, "loss": 17.7391, "step": 16233 }, { "epoch": 0.29674447511287405, "grad_norm": 6.149862095375403, "learning_rate": 8.247516670421195e-06, "loss": 17.3587, "step": 16234 }, { "epoch": 0.2967627543093206, "grad_norm": 7.031136387036252, "learning_rate": 8.247291589769065e-06, "loss": 17.9162, "step": 16235 }, { "epoch": 0.29678103350576707, "grad_norm": 6.355655195530405, "learning_rate": 8.24706649773546e-06, "loss": 17.638, "step": 16236 }, { "epoch": 0.2967993127022136, "grad_norm": 6.13200706403997, "learning_rate": 8.246841394321172e-06, "loss": 17.454, "step": 16237 }, { "epoch": 0.29681759189866014, "grad_norm": 5.62872690399162, "learning_rate": 8.246616279526982e-06, "loss": 17.1959, "step": 16238 }, { "epoch": 0.2968358710951067, "grad_norm": 7.210139026661183, "learning_rate": 8.246391153353687e-06, "loss": 17.5475, "step": 16239 }, { "epoch": 0.2968541502915532, "grad_norm": 6.75275509676336, "learning_rate": 8.24616601580207e-06, "loss": 17.5892, "step": 16240 }, { "epoch": 0.2968724294879997, "grad_norm": 6.834470207428036, "learning_rate": 8.245940866872925e-06, "loss": 17.3088, "step": 16241 }, { "epoch": 0.2968907086844462, "grad_norm": 7.7373090466346275, "learning_rate": 8.245715706567038e-06, "loss": 18.2506, "step": 16242 }, { "epoch": 0.29690898788089276, "grad_norm": 6.253828221575055, "learning_rate": 8.2454905348852e-06, "loss": 17.335, "step": 16243 }, { "epoch": 0.2969272670773393, "grad_norm": 7.914715304597217, "learning_rate": 8.245265351828197e-06, "loss": 18.1206, "step": 16244 }, { "epoch": 0.29694554627378583, "grad_norm": 6.230357335525374, "learning_rate": 8.245040157396824e-06, "loss": 17.6677, "step": 16245 }, { "epoch": 0.2969638254702323, "grad_norm": 6.535313520280362, "learning_rate": 8.244814951591864e-06, "loss": 17.6605, "step": 16246 }, { "epoch": 0.29698210466667885, "grad_norm": 6.625128264606999, "learning_rate": 8.244589734414112e-06, "loss": 17.7093, "step": 16247 }, { "epoch": 0.2970003838631254, "grad_norm": 7.679745526749129, "learning_rate": 8.244364505864351e-06, "loss": 18.37, "step": 16248 }, { "epoch": 0.2970186630595719, "grad_norm": 7.535364743788393, "learning_rate": 8.244139265943376e-06, "loss": 18.0094, "step": 16249 }, { "epoch": 0.2970369422560184, "grad_norm": 6.396184890742791, "learning_rate": 8.243914014651975e-06, "loss": 17.475, "step": 16250 }, { "epoch": 0.29705522145246493, "grad_norm": 6.868777124854813, "learning_rate": 8.243688751990935e-06, "loss": 17.7841, "step": 16251 }, { "epoch": 0.29707350064891147, "grad_norm": 6.31584488355534, "learning_rate": 8.243463477961048e-06, "loss": 17.452, "step": 16252 }, { "epoch": 0.297091779845358, "grad_norm": 6.467455494339748, "learning_rate": 8.243238192563103e-06, "loss": 17.4342, "step": 16253 }, { "epoch": 0.29711005904180454, "grad_norm": 5.442241482256332, "learning_rate": 8.243012895797891e-06, "loss": 17.1801, "step": 16254 }, { "epoch": 0.297128338238251, "grad_norm": 6.469735857165421, "learning_rate": 8.242787587666198e-06, "loss": 17.731, "step": 16255 }, { "epoch": 0.29714661743469756, "grad_norm": 6.322748020825891, "learning_rate": 8.242562268168817e-06, "loss": 17.2883, "step": 16256 }, { "epoch": 0.2971648966311441, "grad_norm": 6.610727907010459, "learning_rate": 8.242336937306536e-06, "loss": 17.8377, "step": 16257 }, { "epoch": 0.29718317582759063, "grad_norm": 5.088755091306931, "learning_rate": 8.242111595080146e-06, "loss": 17.1507, "step": 16258 }, { "epoch": 0.29720145502403716, "grad_norm": 8.340190513043664, "learning_rate": 8.241886241490438e-06, "loss": 18.0603, "step": 16259 }, { "epoch": 0.29721973422048364, "grad_norm": 6.459625104554761, "learning_rate": 8.241660876538198e-06, "loss": 17.7535, "step": 16260 }, { "epoch": 0.2972380134169302, "grad_norm": 5.587747198410822, "learning_rate": 8.241435500224217e-06, "loss": 17.0845, "step": 16261 }, { "epoch": 0.2972562926133767, "grad_norm": 6.732006582626895, "learning_rate": 8.241210112549287e-06, "loss": 17.7029, "step": 16262 }, { "epoch": 0.29727457180982325, "grad_norm": 7.984138342881745, "learning_rate": 8.240984713514198e-06, "loss": 17.9232, "step": 16263 }, { "epoch": 0.2972928510062698, "grad_norm": 7.675562147358658, "learning_rate": 8.240759303119736e-06, "loss": 18.595, "step": 16264 }, { "epoch": 0.29731113020271627, "grad_norm": 6.360932458947583, "learning_rate": 8.240533881366696e-06, "loss": 17.6675, "step": 16265 }, { "epoch": 0.2973294093991628, "grad_norm": 6.27721314278734, "learning_rate": 8.240308448255866e-06, "loss": 17.6947, "step": 16266 }, { "epoch": 0.29734768859560934, "grad_norm": 6.977908367355837, "learning_rate": 8.240083003788036e-06, "loss": 17.6504, "step": 16267 }, { "epoch": 0.2973659677920559, "grad_norm": 6.049040808097772, "learning_rate": 8.239857547963995e-06, "loss": 17.3966, "step": 16268 }, { "epoch": 0.2973842469885024, "grad_norm": 7.1086045804802795, "learning_rate": 8.239632080784535e-06, "loss": 17.7306, "step": 16269 }, { "epoch": 0.2974025261849489, "grad_norm": 6.054916242819913, "learning_rate": 8.239406602250447e-06, "loss": 17.3749, "step": 16270 }, { "epoch": 0.2974208053813954, "grad_norm": 6.9152581995438895, "learning_rate": 8.239181112362517e-06, "loss": 17.6033, "step": 16271 }, { "epoch": 0.29743908457784196, "grad_norm": 6.459269066450177, "learning_rate": 8.238955611121541e-06, "loss": 17.4815, "step": 16272 }, { "epoch": 0.2974573637742885, "grad_norm": 6.8023051578082026, "learning_rate": 8.238730098528306e-06, "loss": 17.6099, "step": 16273 }, { "epoch": 0.29747564297073503, "grad_norm": 8.092449666154756, "learning_rate": 8.2385045745836e-06, "loss": 17.9357, "step": 16274 }, { "epoch": 0.2974939221671815, "grad_norm": 7.398706690100068, "learning_rate": 8.238279039288222e-06, "loss": 17.9046, "step": 16275 }, { "epoch": 0.29751220136362805, "grad_norm": 5.909733446999433, "learning_rate": 8.238053492642954e-06, "loss": 17.2984, "step": 16276 }, { "epoch": 0.2975304805600746, "grad_norm": 6.661307944514938, "learning_rate": 8.23782793464859e-06, "loss": 17.5077, "step": 16277 }, { "epoch": 0.2975487597565211, "grad_norm": 5.915677565068317, "learning_rate": 8.23760236530592e-06, "loss": 17.0839, "step": 16278 }, { "epoch": 0.29756703895296766, "grad_norm": 6.2602485757742645, "learning_rate": 8.237376784615734e-06, "loss": 17.6571, "step": 16279 }, { "epoch": 0.29758531814941414, "grad_norm": 6.740954056650279, "learning_rate": 8.237151192578823e-06, "loss": 17.648, "step": 16280 }, { "epoch": 0.29760359734586067, "grad_norm": 6.573906646349148, "learning_rate": 8.236925589195978e-06, "loss": 17.4232, "step": 16281 }, { "epoch": 0.2976218765423072, "grad_norm": 7.325810720028958, "learning_rate": 8.236699974467993e-06, "loss": 17.9357, "step": 16282 }, { "epoch": 0.29764015573875374, "grad_norm": 7.240468148216298, "learning_rate": 8.236474348395651e-06, "loss": 17.394, "step": 16283 }, { "epoch": 0.2976584349352002, "grad_norm": 6.918691931463542, "learning_rate": 8.23624871097975e-06, "loss": 17.3358, "step": 16284 }, { "epoch": 0.29767671413164676, "grad_norm": 6.13736298047117, "learning_rate": 8.236023062221077e-06, "loss": 17.4781, "step": 16285 }, { "epoch": 0.2976949933280933, "grad_norm": 7.117920319422106, "learning_rate": 8.235797402120425e-06, "loss": 17.8098, "step": 16286 }, { "epoch": 0.29771327252453983, "grad_norm": 6.822319572920566, "learning_rate": 8.235571730678583e-06, "loss": 17.5788, "step": 16287 }, { "epoch": 0.29773155172098636, "grad_norm": 5.6378570072738, "learning_rate": 8.235346047896342e-06, "loss": 17.246, "step": 16288 }, { "epoch": 0.29774983091743285, "grad_norm": 6.837267184240421, "learning_rate": 8.235120353774494e-06, "loss": 17.9497, "step": 16289 }, { "epoch": 0.2977681101138794, "grad_norm": 6.56729590587665, "learning_rate": 8.234894648313832e-06, "loss": 17.8588, "step": 16290 }, { "epoch": 0.2977863893103259, "grad_norm": 7.694086139703035, "learning_rate": 8.234668931515143e-06, "loss": 18.0188, "step": 16291 }, { "epoch": 0.29780466850677245, "grad_norm": 7.2056602639730345, "learning_rate": 8.23444320337922e-06, "loss": 17.957, "step": 16292 }, { "epoch": 0.297822947703219, "grad_norm": 6.620513928103384, "learning_rate": 8.234217463906857e-06, "loss": 17.4882, "step": 16293 }, { "epoch": 0.29784122689966547, "grad_norm": 6.255504383389368, "learning_rate": 8.233991713098839e-06, "loss": 17.3204, "step": 16294 }, { "epoch": 0.297859506096112, "grad_norm": 5.402672496530524, "learning_rate": 8.233765950955963e-06, "loss": 17.1263, "step": 16295 }, { "epoch": 0.29787778529255854, "grad_norm": 7.434840654253358, "learning_rate": 8.233540177479016e-06, "loss": 17.7537, "step": 16296 }, { "epoch": 0.2978960644890051, "grad_norm": 6.053828911386629, "learning_rate": 8.233314392668794e-06, "loss": 17.3347, "step": 16297 }, { "epoch": 0.2979143436854516, "grad_norm": 8.279375604491499, "learning_rate": 8.233088596526082e-06, "loss": 18.3454, "step": 16298 }, { "epoch": 0.2979326228818981, "grad_norm": 7.516060747930949, "learning_rate": 8.232862789051678e-06, "loss": 18.3192, "step": 16299 }, { "epoch": 0.2979509020783446, "grad_norm": 5.65837228236491, "learning_rate": 8.232636970246371e-06, "loss": 17.2985, "step": 16300 }, { "epoch": 0.29796918127479116, "grad_norm": 7.764916684762759, "learning_rate": 8.232411140110948e-06, "loss": 18.1349, "step": 16301 }, { "epoch": 0.2979874604712377, "grad_norm": 6.2881691007591485, "learning_rate": 8.232185298646209e-06, "loss": 17.316, "step": 16302 }, { "epoch": 0.29800573966768423, "grad_norm": 5.6482328866201525, "learning_rate": 8.231959445852937e-06, "loss": 17.1771, "step": 16303 }, { "epoch": 0.2980240188641307, "grad_norm": 5.602594649059896, "learning_rate": 8.23173358173193e-06, "loss": 17.1372, "step": 16304 }, { "epoch": 0.29804229806057725, "grad_norm": 7.803580560883558, "learning_rate": 8.231507706283976e-06, "loss": 17.8065, "step": 16305 }, { "epoch": 0.2980605772570238, "grad_norm": 7.578179986971631, "learning_rate": 8.231281819509869e-06, "loss": 18.0388, "step": 16306 }, { "epoch": 0.2980788564534703, "grad_norm": 6.970383338331267, "learning_rate": 8.2310559214104e-06, "loss": 17.875, "step": 16307 }, { "epoch": 0.29809713564991686, "grad_norm": 5.25820880547372, "learning_rate": 8.23083001198636e-06, "loss": 17.0394, "step": 16308 }, { "epoch": 0.29811541484636334, "grad_norm": 5.943940907953198, "learning_rate": 8.230604091238542e-06, "loss": 17.5735, "step": 16309 }, { "epoch": 0.29813369404280987, "grad_norm": 6.077625711083019, "learning_rate": 8.230378159167733e-06, "loss": 17.4468, "step": 16310 }, { "epoch": 0.2981519732392564, "grad_norm": 6.435290454954497, "learning_rate": 8.230152215774731e-06, "loss": 17.3735, "step": 16311 }, { "epoch": 0.29817025243570294, "grad_norm": 5.978024970966035, "learning_rate": 8.229926261060328e-06, "loss": 17.0599, "step": 16312 }, { "epoch": 0.2981885316321495, "grad_norm": 7.255333358452495, "learning_rate": 8.229700295025311e-06, "loss": 18.2076, "step": 16313 }, { "epoch": 0.29820681082859596, "grad_norm": 5.767137796494151, "learning_rate": 8.229474317670476e-06, "loss": 17.2848, "step": 16314 }, { "epoch": 0.2982250900250425, "grad_norm": 5.74214433296508, "learning_rate": 8.229248328996615e-06, "loss": 17.396, "step": 16315 }, { "epoch": 0.29824336922148903, "grad_norm": 7.15303670243752, "learning_rate": 8.229022329004518e-06, "loss": 17.8744, "step": 16316 }, { "epoch": 0.29826164841793557, "grad_norm": 6.630547596158621, "learning_rate": 8.228796317694976e-06, "loss": 17.6934, "step": 16317 }, { "epoch": 0.29827992761438205, "grad_norm": 6.012624207429132, "learning_rate": 8.228570295068785e-06, "loss": 17.3032, "step": 16318 }, { "epoch": 0.2982982068108286, "grad_norm": 7.6240797394970325, "learning_rate": 8.228344261126735e-06, "loss": 18.0754, "step": 16319 }, { "epoch": 0.2983164860072751, "grad_norm": 7.414234900991309, "learning_rate": 8.228118215869619e-06, "loss": 18.1664, "step": 16320 }, { "epoch": 0.29833476520372165, "grad_norm": 7.557201962615848, "learning_rate": 8.227892159298228e-06, "loss": 17.6655, "step": 16321 }, { "epoch": 0.2983530444001682, "grad_norm": 6.3213765342509, "learning_rate": 8.227666091413354e-06, "loss": 17.3342, "step": 16322 }, { "epoch": 0.29837132359661467, "grad_norm": 7.023726114079908, "learning_rate": 8.22744001221579e-06, "loss": 17.743, "step": 16323 }, { "epoch": 0.2983896027930612, "grad_norm": 6.289313939971966, "learning_rate": 8.227213921706332e-06, "loss": 17.5194, "step": 16324 }, { "epoch": 0.29840788198950774, "grad_norm": 5.8862396089787845, "learning_rate": 8.226987819885767e-06, "loss": 17.2599, "step": 16325 }, { "epoch": 0.2984261611859543, "grad_norm": 7.5430086296374474, "learning_rate": 8.226761706754891e-06, "loss": 18.1224, "step": 16326 }, { "epoch": 0.2984444403824008, "grad_norm": 8.393691036336323, "learning_rate": 8.226535582314494e-06, "loss": 18.579, "step": 16327 }, { "epoch": 0.2984627195788473, "grad_norm": 5.713785414661842, "learning_rate": 8.226309446565371e-06, "loss": 17.2607, "step": 16328 }, { "epoch": 0.2984809987752938, "grad_norm": 6.678237594393372, "learning_rate": 8.226083299508312e-06, "loss": 17.6117, "step": 16329 }, { "epoch": 0.29849927797174036, "grad_norm": 7.120928405474422, "learning_rate": 8.225857141144111e-06, "loss": 17.8109, "step": 16330 }, { "epoch": 0.2985175571681869, "grad_norm": 6.400104494718224, "learning_rate": 8.225630971473561e-06, "loss": 17.559, "step": 16331 }, { "epoch": 0.29853583636463343, "grad_norm": 6.076709500381016, "learning_rate": 8.225404790497456e-06, "loss": 17.2295, "step": 16332 }, { "epoch": 0.2985541155610799, "grad_norm": 6.556860148667879, "learning_rate": 8.225178598216586e-06, "loss": 17.7958, "step": 16333 }, { "epoch": 0.29857239475752645, "grad_norm": 6.5912662165209746, "learning_rate": 8.224952394631744e-06, "loss": 17.3398, "step": 16334 }, { "epoch": 0.298590673953973, "grad_norm": 7.461391451472866, "learning_rate": 8.224726179743726e-06, "loss": 17.8425, "step": 16335 }, { "epoch": 0.2986089531504195, "grad_norm": 6.171684837161076, "learning_rate": 8.22449995355332e-06, "loss": 17.5799, "step": 16336 }, { "epoch": 0.29862723234686606, "grad_norm": 7.531757010209729, "learning_rate": 8.224273716061321e-06, "loss": 17.9508, "step": 16337 }, { "epoch": 0.29864551154331254, "grad_norm": 6.823430736037084, "learning_rate": 8.224047467268524e-06, "loss": 17.7491, "step": 16338 }, { "epoch": 0.29866379073975907, "grad_norm": 5.504777292052472, "learning_rate": 8.22382120717572e-06, "loss": 17.0871, "step": 16339 }, { "epoch": 0.2986820699362056, "grad_norm": 6.439319508459507, "learning_rate": 8.223594935783703e-06, "loss": 17.5903, "step": 16340 }, { "epoch": 0.29870034913265214, "grad_norm": 7.342933055695366, "learning_rate": 8.223368653093267e-06, "loss": 17.9005, "step": 16341 }, { "epoch": 0.2987186283290987, "grad_norm": 6.640712689129402, "learning_rate": 8.2231423591052e-06, "loss": 17.3471, "step": 16342 }, { "epoch": 0.29873690752554516, "grad_norm": 5.821954409355846, "learning_rate": 8.2229160538203e-06, "loss": 17.3971, "step": 16343 }, { "epoch": 0.2987551867219917, "grad_norm": 5.710282016845329, "learning_rate": 8.22268973723936e-06, "loss": 17.2268, "step": 16344 }, { "epoch": 0.29877346591843823, "grad_norm": 5.344218447819602, "learning_rate": 8.222463409363171e-06, "loss": 17.2301, "step": 16345 }, { "epoch": 0.29879174511488477, "grad_norm": 6.029696883942533, "learning_rate": 8.222237070192528e-06, "loss": 17.2053, "step": 16346 }, { "epoch": 0.2988100243113313, "grad_norm": 10.209855700168081, "learning_rate": 8.222010719728225e-06, "loss": 18.889, "step": 16347 }, { "epoch": 0.2988283035077778, "grad_norm": 6.3143922895963716, "learning_rate": 8.221784357971053e-06, "loss": 17.282, "step": 16348 }, { "epoch": 0.2988465827042243, "grad_norm": 6.721410765336859, "learning_rate": 8.221557984921803e-06, "loss": 17.7417, "step": 16349 }, { "epoch": 0.29886486190067085, "grad_norm": 6.411746297474031, "learning_rate": 8.221331600581276e-06, "loss": 17.4306, "step": 16350 }, { "epoch": 0.2988831410971174, "grad_norm": 6.359058951813595, "learning_rate": 8.221105204950259e-06, "loss": 17.7775, "step": 16351 }, { "epoch": 0.29890142029356387, "grad_norm": 9.464212744525556, "learning_rate": 8.22087879802955e-06, "loss": 18.5987, "step": 16352 }, { "epoch": 0.2989196994900104, "grad_norm": 7.341539248348361, "learning_rate": 8.220652379819939e-06, "loss": 18.0818, "step": 16353 }, { "epoch": 0.29893797868645694, "grad_norm": 5.992769917037668, "learning_rate": 8.220425950322222e-06, "loss": 17.4905, "step": 16354 }, { "epoch": 0.2989562578829035, "grad_norm": 6.541544820443978, "learning_rate": 8.22019950953719e-06, "loss": 17.6631, "step": 16355 }, { "epoch": 0.29897453707935, "grad_norm": 6.882540981182489, "learning_rate": 8.219973057465638e-06, "loss": 17.9089, "step": 16356 }, { "epoch": 0.2989928162757965, "grad_norm": 6.2809731953307955, "learning_rate": 8.21974659410836e-06, "loss": 17.573, "step": 16357 }, { "epoch": 0.299011095472243, "grad_norm": 6.748124825600281, "learning_rate": 8.219520119466152e-06, "loss": 17.7794, "step": 16358 }, { "epoch": 0.29902937466868956, "grad_norm": 6.647893390589946, "learning_rate": 8.219293633539803e-06, "loss": 17.3586, "step": 16359 }, { "epoch": 0.2990476538651361, "grad_norm": 6.014042612491038, "learning_rate": 8.219067136330107e-06, "loss": 17.3431, "step": 16360 }, { "epoch": 0.29906593306158263, "grad_norm": 5.92188614220963, "learning_rate": 8.218840627837866e-06, "loss": 17.2457, "step": 16361 }, { "epoch": 0.2990842122580291, "grad_norm": 7.5322215277244355, "learning_rate": 8.218614108063863e-06, "loss": 17.8183, "step": 16362 }, { "epoch": 0.29910249145447565, "grad_norm": 6.318718321398887, "learning_rate": 8.218387577008896e-06, "loss": 17.3413, "step": 16363 }, { "epoch": 0.2991207706509222, "grad_norm": 8.63721541871255, "learning_rate": 8.218161034673763e-06, "loss": 18.7411, "step": 16364 }, { "epoch": 0.2991390498473687, "grad_norm": 5.896134279393408, "learning_rate": 8.217934481059255e-06, "loss": 17.4976, "step": 16365 }, { "epoch": 0.29915732904381526, "grad_norm": 6.614586099824656, "learning_rate": 8.217707916166165e-06, "loss": 17.6771, "step": 16366 }, { "epoch": 0.29917560824026174, "grad_norm": 7.594683145901173, "learning_rate": 8.217481339995288e-06, "loss": 17.923, "step": 16367 }, { "epoch": 0.2991938874367083, "grad_norm": 7.1342326422791515, "learning_rate": 8.217254752547419e-06, "loss": 17.8564, "step": 16368 }, { "epoch": 0.2992121666331548, "grad_norm": 8.750236790492691, "learning_rate": 8.21702815382335e-06, "loss": 18.7921, "step": 16369 }, { "epoch": 0.29923044582960134, "grad_norm": 7.5708330389398855, "learning_rate": 8.216801543823875e-06, "loss": 18.0739, "step": 16370 }, { "epoch": 0.2992487250260479, "grad_norm": 6.925853287739947, "learning_rate": 8.216574922549794e-06, "loss": 17.4019, "step": 16371 }, { "epoch": 0.29926700422249436, "grad_norm": 8.817725573612206, "learning_rate": 8.216348290001893e-06, "loss": 18.5605, "step": 16372 }, { "epoch": 0.2992852834189409, "grad_norm": 6.942994691492411, "learning_rate": 8.216121646180973e-06, "loss": 17.6855, "step": 16373 }, { "epoch": 0.29930356261538743, "grad_norm": 5.870285652412425, "learning_rate": 8.215894991087823e-06, "loss": 17.1564, "step": 16374 }, { "epoch": 0.29932184181183397, "grad_norm": 7.281625574319899, "learning_rate": 8.215668324723242e-06, "loss": 18.0881, "step": 16375 }, { "epoch": 0.2993401210082805, "grad_norm": 6.719466972932332, "learning_rate": 8.215441647088023e-06, "loss": 17.588, "step": 16376 }, { "epoch": 0.299358400204727, "grad_norm": 7.8788804222029265, "learning_rate": 8.215214958182959e-06, "loss": 17.8596, "step": 16377 }, { "epoch": 0.2993766794011735, "grad_norm": 6.53591483455379, "learning_rate": 8.214988258008845e-06, "loss": 17.5788, "step": 16378 }, { "epoch": 0.29939495859762005, "grad_norm": 6.936515196876394, "learning_rate": 8.214761546566478e-06, "loss": 17.7465, "step": 16379 }, { "epoch": 0.2994132377940666, "grad_norm": 8.580979463421398, "learning_rate": 8.21453482385665e-06, "loss": 18.464, "step": 16380 }, { "epoch": 0.2994315169905131, "grad_norm": 6.168998388455126, "learning_rate": 8.214308089880156e-06, "loss": 17.3842, "step": 16381 }, { "epoch": 0.2994497961869596, "grad_norm": 7.31217079621152, "learning_rate": 8.214081344637792e-06, "loss": 17.7132, "step": 16382 }, { "epoch": 0.29946807538340614, "grad_norm": 9.892344239339227, "learning_rate": 8.21385458813035e-06, "loss": 17.5744, "step": 16383 }, { "epoch": 0.2994863545798527, "grad_norm": 6.62911889642519, "learning_rate": 8.213627820358627e-06, "loss": 17.6832, "step": 16384 }, { "epoch": 0.2995046337762992, "grad_norm": 6.143767905709122, "learning_rate": 8.213401041323418e-06, "loss": 17.6862, "step": 16385 }, { "epoch": 0.2995229129727457, "grad_norm": 7.191470006510474, "learning_rate": 8.213174251025517e-06, "loss": 17.8029, "step": 16386 }, { "epoch": 0.2995411921691922, "grad_norm": 5.654302385320921, "learning_rate": 8.21294744946572e-06, "loss": 17.2512, "step": 16387 }, { "epoch": 0.29955947136563876, "grad_norm": 6.370351161826269, "learning_rate": 8.21272063664482e-06, "loss": 17.2756, "step": 16388 }, { "epoch": 0.2995777505620853, "grad_norm": 6.32073824523956, "learning_rate": 8.212493812563613e-06, "loss": 17.5443, "step": 16389 }, { "epoch": 0.29959602975853183, "grad_norm": 6.537775541813034, "learning_rate": 8.212266977222893e-06, "loss": 17.6793, "step": 16390 }, { "epoch": 0.2996143089549783, "grad_norm": 8.518777316757442, "learning_rate": 8.212040130623458e-06, "loss": 18.5988, "step": 16391 }, { "epoch": 0.29963258815142485, "grad_norm": 6.986553902632954, "learning_rate": 8.2118132727661e-06, "loss": 17.9079, "step": 16392 }, { "epoch": 0.2996508673478714, "grad_norm": 7.459135995463893, "learning_rate": 8.211586403651616e-06, "loss": 18.0181, "step": 16393 }, { "epoch": 0.2996691465443179, "grad_norm": 6.109765355923692, "learning_rate": 8.2113595232808e-06, "loss": 17.2701, "step": 16394 }, { "epoch": 0.29968742574076446, "grad_norm": 7.112389133833306, "learning_rate": 8.211132631654447e-06, "loss": 17.8923, "step": 16395 }, { "epoch": 0.29970570493721094, "grad_norm": 5.862526486348486, "learning_rate": 8.210905728773353e-06, "loss": 17.3055, "step": 16396 }, { "epoch": 0.2997239841336575, "grad_norm": 6.354541865061286, "learning_rate": 8.210678814638313e-06, "loss": 17.4199, "step": 16397 }, { "epoch": 0.299742263330104, "grad_norm": 6.5336974846492035, "learning_rate": 8.210451889250121e-06, "loss": 17.5184, "step": 16398 }, { "epoch": 0.29976054252655054, "grad_norm": 8.431012648038225, "learning_rate": 8.210224952609575e-06, "loss": 17.8148, "step": 16399 }, { "epoch": 0.2997788217229971, "grad_norm": 7.255367776565569, "learning_rate": 8.20999800471747e-06, "loss": 17.6895, "step": 16400 }, { "epoch": 0.29979710091944356, "grad_norm": 5.651987035069994, "learning_rate": 8.209771045574599e-06, "loss": 17.1961, "step": 16401 }, { "epoch": 0.2998153801158901, "grad_norm": 6.4202266438013655, "learning_rate": 8.20954407518176e-06, "loss": 17.5035, "step": 16402 }, { "epoch": 0.29983365931233663, "grad_norm": 7.965755841062741, "learning_rate": 8.209317093539748e-06, "loss": 18.2732, "step": 16403 }, { "epoch": 0.29985193850878317, "grad_norm": 7.526054502795034, "learning_rate": 8.209090100649357e-06, "loss": 17.8266, "step": 16404 }, { "epoch": 0.2998702177052297, "grad_norm": 7.239781023690425, "learning_rate": 8.208863096511385e-06, "loss": 17.6852, "step": 16405 }, { "epoch": 0.2998884969016762, "grad_norm": 7.012467979271183, "learning_rate": 8.208636081126625e-06, "loss": 17.8401, "step": 16406 }, { "epoch": 0.2999067760981227, "grad_norm": 6.49402094062138, "learning_rate": 8.208409054495874e-06, "loss": 17.4207, "step": 16407 }, { "epoch": 0.29992505529456925, "grad_norm": 6.415587472946477, "learning_rate": 8.208182016619928e-06, "loss": 17.5619, "step": 16408 }, { "epoch": 0.2999433344910158, "grad_norm": 5.439534581469668, "learning_rate": 8.207954967499583e-06, "loss": 17.0143, "step": 16409 }, { "epoch": 0.2999616136874623, "grad_norm": 6.0970909843815475, "learning_rate": 8.207727907135634e-06, "loss": 17.4049, "step": 16410 }, { "epoch": 0.2999798928839088, "grad_norm": 6.199160967168086, "learning_rate": 8.207500835528877e-06, "loss": 17.5505, "step": 16411 }, { "epoch": 0.29999817208035534, "grad_norm": 6.3804577425469065, "learning_rate": 8.207273752680107e-06, "loss": 17.4565, "step": 16412 }, { "epoch": 0.3000164512768019, "grad_norm": 6.856210443440484, "learning_rate": 8.207046658590121e-06, "loss": 17.643, "step": 16413 }, { "epoch": 0.3000347304732484, "grad_norm": 7.289911227589726, "learning_rate": 8.206819553259716e-06, "loss": 17.8324, "step": 16414 }, { "epoch": 0.30005300966969495, "grad_norm": 6.578725571598782, "learning_rate": 8.206592436689686e-06, "loss": 17.4686, "step": 16415 }, { "epoch": 0.30007128886614143, "grad_norm": 7.295758397619474, "learning_rate": 8.206365308880828e-06, "loss": 17.6599, "step": 16416 }, { "epoch": 0.30008956806258796, "grad_norm": 7.035031104762345, "learning_rate": 8.206138169833938e-06, "loss": 17.7502, "step": 16417 }, { "epoch": 0.3001078472590345, "grad_norm": 7.635415994651476, "learning_rate": 8.20591101954981e-06, "loss": 17.8712, "step": 16418 }, { "epoch": 0.30012612645548103, "grad_norm": 6.0513149454420505, "learning_rate": 8.205683858029244e-06, "loss": 17.5575, "step": 16419 }, { "epoch": 0.3001444056519275, "grad_norm": 7.690193874636865, "learning_rate": 8.205456685273035e-06, "loss": 18.0409, "step": 16420 }, { "epoch": 0.30016268484837405, "grad_norm": 6.337069261755316, "learning_rate": 8.205229501281976e-06, "loss": 17.4, "step": 16421 }, { "epoch": 0.3001809640448206, "grad_norm": 7.08058405623771, "learning_rate": 8.205002306056865e-06, "loss": 17.9945, "step": 16422 }, { "epoch": 0.3001992432412671, "grad_norm": 5.790310461273087, "learning_rate": 8.204775099598503e-06, "loss": 17.2035, "step": 16423 }, { "epoch": 0.30021752243771366, "grad_norm": 6.460634673234715, "learning_rate": 8.20454788190768e-06, "loss": 17.6697, "step": 16424 }, { "epoch": 0.30023580163416014, "grad_norm": 7.638564574355802, "learning_rate": 8.204320652985195e-06, "loss": 17.8488, "step": 16425 }, { "epoch": 0.3002540808306067, "grad_norm": 6.639336955554244, "learning_rate": 8.204093412831845e-06, "loss": 17.6881, "step": 16426 }, { "epoch": 0.3002723600270532, "grad_norm": 6.42767590274851, "learning_rate": 8.203866161448425e-06, "loss": 17.4481, "step": 16427 }, { "epoch": 0.30029063922349974, "grad_norm": 7.207889403731094, "learning_rate": 8.20363889883573e-06, "loss": 18.0579, "step": 16428 }, { "epoch": 0.3003089184199463, "grad_norm": 6.107333428761575, "learning_rate": 8.203411624994561e-06, "loss": 17.398, "step": 16429 }, { "epoch": 0.30032719761639276, "grad_norm": 5.7498502185620115, "learning_rate": 8.203184339925714e-06, "loss": 17.209, "step": 16430 }, { "epoch": 0.3003454768128393, "grad_norm": 6.980368445476265, "learning_rate": 8.202957043629981e-06, "loss": 17.6543, "step": 16431 }, { "epoch": 0.30036375600928583, "grad_norm": 6.612427473663922, "learning_rate": 8.202729736108163e-06, "loss": 17.503, "step": 16432 }, { "epoch": 0.30038203520573237, "grad_norm": 6.970997891185264, "learning_rate": 8.202502417361053e-06, "loss": 17.7627, "step": 16433 }, { "epoch": 0.3004003144021789, "grad_norm": 6.73318970432507, "learning_rate": 8.202275087389452e-06, "loss": 17.5573, "step": 16434 }, { "epoch": 0.3004185935986254, "grad_norm": 7.305255101930601, "learning_rate": 8.202047746194155e-06, "loss": 17.8708, "step": 16435 }, { "epoch": 0.3004368727950719, "grad_norm": 6.06333359743604, "learning_rate": 8.201820393775957e-06, "loss": 17.3008, "step": 16436 }, { "epoch": 0.30045515199151845, "grad_norm": 8.089268148461798, "learning_rate": 8.201593030135657e-06, "loss": 18.2991, "step": 16437 }, { "epoch": 0.300473431187965, "grad_norm": 5.927432331427687, "learning_rate": 8.201365655274051e-06, "loss": 17.5833, "step": 16438 }, { "epoch": 0.3004917103844115, "grad_norm": 5.144567413931674, "learning_rate": 8.201138269191937e-06, "loss": 17.0671, "step": 16439 }, { "epoch": 0.300509989580858, "grad_norm": 5.5994736216512315, "learning_rate": 8.20091087189011e-06, "loss": 17.1244, "step": 16440 }, { "epoch": 0.30052826877730454, "grad_norm": 7.242150273910535, "learning_rate": 8.20068346336937e-06, "loss": 17.9618, "step": 16441 }, { "epoch": 0.3005465479737511, "grad_norm": 6.085807824257408, "learning_rate": 8.20045604363051e-06, "loss": 17.5531, "step": 16442 }, { "epoch": 0.3005648271701976, "grad_norm": 5.901827548152579, "learning_rate": 8.20022861267433e-06, "loss": 17.2967, "step": 16443 }, { "epoch": 0.30058310636664415, "grad_norm": 6.5713707264465375, "learning_rate": 8.200001170501627e-06, "loss": 17.168, "step": 16444 }, { "epoch": 0.30060138556309063, "grad_norm": 6.241234340365578, "learning_rate": 8.199773717113198e-06, "loss": 17.3489, "step": 16445 }, { "epoch": 0.30061966475953716, "grad_norm": 7.99825696017237, "learning_rate": 8.199546252509838e-06, "loss": 18.1971, "step": 16446 }, { "epoch": 0.3006379439559837, "grad_norm": 5.952035644458607, "learning_rate": 8.199318776692347e-06, "loss": 17.4981, "step": 16447 }, { "epoch": 0.30065622315243024, "grad_norm": 6.8341513635009425, "learning_rate": 8.199091289661522e-06, "loss": 17.498, "step": 16448 }, { "epoch": 0.30067450234887677, "grad_norm": 6.460937181230472, "learning_rate": 8.198863791418159e-06, "loss": 17.712, "step": 16449 }, { "epoch": 0.30069278154532325, "grad_norm": 7.310936784672614, "learning_rate": 8.198636281963055e-06, "loss": 17.8312, "step": 16450 }, { "epoch": 0.3007110607417698, "grad_norm": 5.779597444089757, "learning_rate": 8.19840876129701e-06, "loss": 17.1703, "step": 16451 }, { "epoch": 0.3007293399382163, "grad_norm": 7.292474928284607, "learning_rate": 8.198181229420819e-06, "loss": 17.6399, "step": 16452 }, { "epoch": 0.30074761913466286, "grad_norm": 5.460471257386947, "learning_rate": 8.197953686335281e-06, "loss": 17.1148, "step": 16453 }, { "epoch": 0.30076589833110934, "grad_norm": 7.682451591408236, "learning_rate": 8.197726132041194e-06, "loss": 18.075, "step": 16454 }, { "epoch": 0.3007841775275559, "grad_norm": 7.517908304719064, "learning_rate": 8.19749856653935e-06, "loss": 18.0855, "step": 16455 }, { "epoch": 0.3008024567240024, "grad_norm": 7.706676260127519, "learning_rate": 8.197270989830554e-06, "loss": 17.9831, "step": 16456 }, { "epoch": 0.30082073592044895, "grad_norm": 8.199037568674859, "learning_rate": 8.197043401915601e-06, "loss": 17.9961, "step": 16457 }, { "epoch": 0.3008390151168955, "grad_norm": 6.534789165792984, "learning_rate": 8.196815802795288e-06, "loss": 17.4348, "step": 16458 }, { "epoch": 0.30085729431334196, "grad_norm": 6.904812089585506, "learning_rate": 8.196588192470412e-06, "loss": 17.5515, "step": 16459 }, { "epoch": 0.3008755735097885, "grad_norm": 5.8144758827684795, "learning_rate": 8.196360570941773e-06, "loss": 17.4175, "step": 16460 }, { "epoch": 0.30089385270623503, "grad_norm": 13.19151471823941, "learning_rate": 8.196132938210166e-06, "loss": 18.2953, "step": 16461 }, { "epoch": 0.30091213190268157, "grad_norm": 7.153700518477827, "learning_rate": 8.195905294276392e-06, "loss": 17.8263, "step": 16462 }, { "epoch": 0.3009304110991281, "grad_norm": 6.088575872454362, "learning_rate": 8.195677639141247e-06, "loss": 17.5734, "step": 16463 }, { "epoch": 0.3009486902955746, "grad_norm": 7.01020178943264, "learning_rate": 8.195449972805529e-06, "loss": 17.3798, "step": 16464 }, { "epoch": 0.3009669694920211, "grad_norm": 6.772620443157715, "learning_rate": 8.195222295270035e-06, "loss": 17.3864, "step": 16465 }, { "epoch": 0.30098524868846765, "grad_norm": 7.187676691443884, "learning_rate": 8.194994606535566e-06, "loss": 17.8388, "step": 16466 }, { "epoch": 0.3010035278849142, "grad_norm": 6.564834304727205, "learning_rate": 8.194766906602916e-06, "loss": 17.4207, "step": 16467 }, { "epoch": 0.3010218070813607, "grad_norm": 7.6128025116026885, "learning_rate": 8.194539195472888e-06, "loss": 18.0054, "step": 16468 }, { "epoch": 0.3010400862778072, "grad_norm": 7.329839875497806, "learning_rate": 8.194311473146274e-06, "loss": 17.9493, "step": 16469 }, { "epoch": 0.30105836547425374, "grad_norm": 6.162042508545888, "learning_rate": 8.19408373962388e-06, "loss": 17.4555, "step": 16470 }, { "epoch": 0.3010766446707003, "grad_norm": 6.166557836595641, "learning_rate": 8.193855994906497e-06, "loss": 17.5689, "step": 16471 }, { "epoch": 0.3010949238671468, "grad_norm": 6.038105628137205, "learning_rate": 8.193628238994924e-06, "loss": 17.223, "step": 16472 }, { "epoch": 0.30111320306359335, "grad_norm": 5.687321592545057, "learning_rate": 8.193400471889965e-06, "loss": 17.3932, "step": 16473 }, { "epoch": 0.30113148226003983, "grad_norm": 6.6969656967844085, "learning_rate": 8.19317269359241e-06, "loss": 17.6497, "step": 16474 }, { "epoch": 0.30114976145648636, "grad_norm": 8.598318612771129, "learning_rate": 8.192944904103065e-06, "loss": 18.3712, "step": 16475 }, { "epoch": 0.3011680406529329, "grad_norm": 6.489485945335258, "learning_rate": 8.192717103422725e-06, "loss": 17.5235, "step": 16476 }, { "epoch": 0.30118631984937944, "grad_norm": 7.399255963939922, "learning_rate": 8.192489291552188e-06, "loss": 17.6362, "step": 16477 }, { "epoch": 0.30120459904582597, "grad_norm": 12.276893317189007, "learning_rate": 8.192261468492252e-06, "loss": 18.7239, "step": 16478 }, { "epoch": 0.30122287824227245, "grad_norm": 6.283561086603921, "learning_rate": 8.19203363424372e-06, "loss": 17.6668, "step": 16479 }, { "epoch": 0.301241157438719, "grad_norm": 8.687683138104715, "learning_rate": 8.191805788807383e-06, "loss": 18.1202, "step": 16480 }, { "epoch": 0.3012594366351655, "grad_norm": 6.722267533928825, "learning_rate": 8.191577932184045e-06, "loss": 17.5651, "step": 16481 }, { "epoch": 0.30127771583161206, "grad_norm": 7.543462823867828, "learning_rate": 8.191350064374505e-06, "loss": 17.8648, "step": 16482 }, { "epoch": 0.3012959950280586, "grad_norm": 7.695502028635188, "learning_rate": 8.19112218537956e-06, "loss": 17.7225, "step": 16483 }, { "epoch": 0.3013142742245051, "grad_norm": 7.016309067710564, "learning_rate": 8.190894295200006e-06, "loss": 17.6521, "step": 16484 }, { "epoch": 0.3013325534209516, "grad_norm": 5.179047886656715, "learning_rate": 8.190666393836646e-06, "loss": 16.982, "step": 16485 }, { "epoch": 0.30135083261739815, "grad_norm": 7.388548267853332, "learning_rate": 8.190438481290278e-06, "loss": 17.9728, "step": 16486 }, { "epoch": 0.3013691118138447, "grad_norm": 5.749814460472834, "learning_rate": 8.190210557561698e-06, "loss": 17.4074, "step": 16487 }, { "epoch": 0.30138739101029116, "grad_norm": 7.430658950912746, "learning_rate": 8.189982622651707e-06, "loss": 17.75, "step": 16488 }, { "epoch": 0.3014056702067377, "grad_norm": 8.619400391358544, "learning_rate": 8.189754676561105e-06, "loss": 17.8165, "step": 16489 }, { "epoch": 0.30142394940318423, "grad_norm": 7.090906533322632, "learning_rate": 8.189526719290688e-06, "loss": 17.644, "step": 16490 }, { "epoch": 0.30144222859963077, "grad_norm": 6.997694260141088, "learning_rate": 8.18929875084126e-06, "loss": 17.5898, "step": 16491 }, { "epoch": 0.3014605077960773, "grad_norm": 6.926362107786558, "learning_rate": 8.189070771213614e-06, "loss": 18.1243, "step": 16492 }, { "epoch": 0.3014787869925238, "grad_norm": 7.318321542607569, "learning_rate": 8.188842780408551e-06, "loss": 17.595, "step": 16493 }, { "epoch": 0.3014970661889703, "grad_norm": 6.952968749692479, "learning_rate": 8.188614778426871e-06, "loss": 17.9359, "step": 16494 }, { "epoch": 0.30151534538541686, "grad_norm": 6.615195948124856, "learning_rate": 8.188386765269376e-06, "loss": 17.546, "step": 16495 }, { "epoch": 0.3015336245818634, "grad_norm": 8.695163492420042, "learning_rate": 8.188158740936859e-06, "loss": 18.9347, "step": 16496 }, { "epoch": 0.3015519037783099, "grad_norm": 5.85640219611583, "learning_rate": 8.187930705430123e-06, "loss": 17.3828, "step": 16497 }, { "epoch": 0.3015701829747564, "grad_norm": 6.906288848652323, "learning_rate": 8.187702658749966e-06, "loss": 17.544, "step": 16498 }, { "epoch": 0.30158846217120294, "grad_norm": 8.21743779413207, "learning_rate": 8.18747460089719e-06, "loss": 18.0989, "step": 16499 }, { "epoch": 0.3016067413676495, "grad_norm": 6.508468927991813, "learning_rate": 8.187246531872588e-06, "loss": 17.5793, "step": 16500 }, { "epoch": 0.301625020564096, "grad_norm": 6.812483683032258, "learning_rate": 8.187018451676967e-06, "loss": 17.7585, "step": 16501 }, { "epoch": 0.30164329976054255, "grad_norm": 7.712239528760078, "learning_rate": 8.186790360311123e-06, "loss": 18.1059, "step": 16502 }, { "epoch": 0.30166157895698903, "grad_norm": 5.874934679959864, "learning_rate": 8.186562257775853e-06, "loss": 17.2574, "step": 16503 }, { "epoch": 0.30167985815343556, "grad_norm": 6.555338602061579, "learning_rate": 8.18633414407196e-06, "loss": 17.6712, "step": 16504 }, { "epoch": 0.3016981373498821, "grad_norm": 7.227967719558234, "learning_rate": 8.186106019200242e-06, "loss": 17.7025, "step": 16505 }, { "epoch": 0.30171641654632864, "grad_norm": 5.869019190080841, "learning_rate": 8.185877883161499e-06, "loss": 17.3396, "step": 16506 }, { "epoch": 0.30173469574277517, "grad_norm": 7.133923522152701, "learning_rate": 8.185649735956532e-06, "loss": 17.9019, "step": 16507 }, { "epoch": 0.30175297493922165, "grad_norm": 7.245080198261911, "learning_rate": 8.185421577586136e-06, "loss": 17.7752, "step": 16508 }, { "epoch": 0.3017712541356682, "grad_norm": 4.810273064943183, "learning_rate": 8.185193408051117e-06, "loss": 16.9043, "step": 16509 }, { "epoch": 0.3017895333321147, "grad_norm": 7.7115591365479546, "learning_rate": 8.184965227352269e-06, "loss": 18.181, "step": 16510 }, { "epoch": 0.30180781252856126, "grad_norm": 8.173721821529089, "learning_rate": 8.184737035490395e-06, "loss": 18.4305, "step": 16511 }, { "epoch": 0.3018260917250078, "grad_norm": 7.775223499109105, "learning_rate": 8.184508832466296e-06, "loss": 17.6237, "step": 16512 }, { "epoch": 0.3018443709214543, "grad_norm": 6.893432384644545, "learning_rate": 8.184280618280767e-06, "loss": 17.5622, "step": 16513 }, { "epoch": 0.3018626501179008, "grad_norm": 6.232925317226751, "learning_rate": 8.184052392934612e-06, "loss": 17.6704, "step": 16514 }, { "epoch": 0.30188092931434735, "grad_norm": 6.098056554058219, "learning_rate": 8.18382415642863e-06, "loss": 17.7114, "step": 16515 }, { "epoch": 0.3018992085107939, "grad_norm": 6.588516230814564, "learning_rate": 8.183595908763621e-06, "loss": 17.5474, "step": 16516 }, { "epoch": 0.3019174877072404, "grad_norm": 6.028432713556956, "learning_rate": 8.183367649940383e-06, "loss": 17.2659, "step": 16517 }, { "epoch": 0.3019357669036869, "grad_norm": 6.720741996752916, "learning_rate": 8.18313937995972e-06, "loss": 17.7225, "step": 16518 }, { "epoch": 0.30195404610013343, "grad_norm": 7.538141356641681, "learning_rate": 8.182911098822429e-06, "loss": 17.6945, "step": 16519 }, { "epoch": 0.30197232529657997, "grad_norm": 7.364066713971182, "learning_rate": 8.182682806529308e-06, "loss": 17.9775, "step": 16520 }, { "epoch": 0.3019906044930265, "grad_norm": 5.94866339604873, "learning_rate": 8.182454503081163e-06, "loss": 17.3491, "step": 16521 }, { "epoch": 0.302008883689473, "grad_norm": 6.276540962611812, "learning_rate": 8.182226188478789e-06, "loss": 17.4954, "step": 16522 }, { "epoch": 0.3020271628859195, "grad_norm": 7.731017932608166, "learning_rate": 8.18199786272299e-06, "loss": 18.1973, "step": 16523 }, { "epoch": 0.30204544208236606, "grad_norm": 7.919062207011995, "learning_rate": 8.181769525814564e-06, "loss": 18.2331, "step": 16524 }, { "epoch": 0.3020637212788126, "grad_norm": 5.786780000985477, "learning_rate": 8.181541177754313e-06, "loss": 17.3974, "step": 16525 }, { "epoch": 0.3020820004752591, "grad_norm": 4.780131828598741, "learning_rate": 8.181312818543035e-06, "loss": 16.9365, "step": 16526 }, { "epoch": 0.3021002796717056, "grad_norm": 6.2531493406756935, "learning_rate": 8.18108444818153e-06, "loss": 17.4062, "step": 16527 }, { "epoch": 0.30211855886815214, "grad_norm": 7.22452000001973, "learning_rate": 8.180856066670601e-06, "loss": 17.8213, "step": 16528 }, { "epoch": 0.3021368380645987, "grad_norm": 7.399980142162305, "learning_rate": 8.18062767401105e-06, "loss": 17.8293, "step": 16529 }, { "epoch": 0.3021551172610452, "grad_norm": 6.711907461664344, "learning_rate": 8.180399270203674e-06, "loss": 17.6821, "step": 16530 }, { "epoch": 0.30217339645749175, "grad_norm": 6.115761496396272, "learning_rate": 8.180170855249273e-06, "loss": 17.4398, "step": 16531 }, { "epoch": 0.30219167565393823, "grad_norm": 6.0219889397981445, "learning_rate": 8.17994242914865e-06, "loss": 17.4822, "step": 16532 }, { "epoch": 0.30220995485038477, "grad_norm": 7.016598860551536, "learning_rate": 8.179713991902604e-06, "loss": 17.8632, "step": 16533 }, { "epoch": 0.3022282340468313, "grad_norm": 6.277178289562638, "learning_rate": 8.179485543511937e-06, "loss": 17.5471, "step": 16534 }, { "epoch": 0.30224651324327784, "grad_norm": 6.154732914126383, "learning_rate": 8.17925708397745e-06, "loss": 17.2966, "step": 16535 }, { "epoch": 0.3022647924397244, "grad_norm": 5.984522907203976, "learning_rate": 8.179028613299942e-06, "loss": 17.2695, "step": 16536 }, { "epoch": 0.30228307163617085, "grad_norm": 6.073259677347897, "learning_rate": 8.178800131480215e-06, "loss": 17.3777, "step": 16537 }, { "epoch": 0.3023013508326174, "grad_norm": 6.778560795947439, "learning_rate": 8.17857163851907e-06, "loss": 17.6691, "step": 16538 }, { "epoch": 0.3023196300290639, "grad_norm": 5.072466790177338, "learning_rate": 8.178343134417305e-06, "loss": 17.0627, "step": 16539 }, { "epoch": 0.30233790922551046, "grad_norm": 6.49635799588229, "learning_rate": 8.178114619175725e-06, "loss": 17.7353, "step": 16540 }, { "epoch": 0.302356188421957, "grad_norm": 6.225165267747494, "learning_rate": 8.17788609279513e-06, "loss": 17.3818, "step": 16541 }, { "epoch": 0.3023744676184035, "grad_norm": 5.528245605871067, "learning_rate": 8.177657555276316e-06, "loss": 17.1716, "step": 16542 }, { "epoch": 0.30239274681485, "grad_norm": 7.403188748676723, "learning_rate": 8.17742900662009e-06, "loss": 18.0589, "step": 16543 }, { "epoch": 0.30241102601129655, "grad_norm": 7.47989327310321, "learning_rate": 8.177200446827253e-06, "loss": 18.2505, "step": 16544 }, { "epoch": 0.3024293052077431, "grad_norm": 6.608004154482326, "learning_rate": 8.176971875898602e-06, "loss": 17.7651, "step": 16545 }, { "epoch": 0.3024475844041896, "grad_norm": 6.595024618848987, "learning_rate": 8.176743293834942e-06, "loss": 17.5882, "step": 16546 }, { "epoch": 0.3024658636006361, "grad_norm": 6.83054755805045, "learning_rate": 8.17651470063707e-06, "loss": 17.783, "step": 16547 }, { "epoch": 0.30248414279708263, "grad_norm": 6.688101712968924, "learning_rate": 8.176286096305791e-06, "loss": 17.5782, "step": 16548 }, { "epoch": 0.30250242199352917, "grad_norm": 7.122155042754724, "learning_rate": 8.176057480841905e-06, "loss": 17.7173, "step": 16549 }, { "epoch": 0.3025207011899757, "grad_norm": 5.9652240692357, "learning_rate": 8.175828854246213e-06, "loss": 17.0419, "step": 16550 }, { "epoch": 0.30253898038642224, "grad_norm": 6.229676225581576, "learning_rate": 8.175600216519518e-06, "loss": 17.3498, "step": 16551 }, { "epoch": 0.3025572595828687, "grad_norm": 6.822616507251027, "learning_rate": 8.175371567662617e-06, "loss": 17.6579, "step": 16552 }, { "epoch": 0.30257553877931526, "grad_norm": 6.216293568132132, "learning_rate": 8.175142907676314e-06, "loss": 17.3725, "step": 16553 }, { "epoch": 0.3025938179757618, "grad_norm": 6.257030541385044, "learning_rate": 8.174914236561413e-06, "loss": 17.5881, "step": 16554 }, { "epoch": 0.3026120971722083, "grad_norm": 6.7557453311657545, "learning_rate": 8.17468555431871e-06, "loss": 17.7954, "step": 16555 }, { "epoch": 0.3026303763686548, "grad_norm": 7.772523328786312, "learning_rate": 8.174456860949013e-06, "loss": 18.0306, "step": 16556 }, { "epoch": 0.30264865556510134, "grad_norm": 5.725160385162716, "learning_rate": 8.174228156453118e-06, "loss": 17.2693, "step": 16557 }, { "epoch": 0.3026669347615479, "grad_norm": 6.027807594635172, "learning_rate": 8.173999440831832e-06, "loss": 17.1753, "step": 16558 }, { "epoch": 0.3026852139579944, "grad_norm": 6.579440995997506, "learning_rate": 8.17377071408595e-06, "loss": 17.7444, "step": 16559 }, { "epoch": 0.30270349315444095, "grad_norm": 7.149903712173893, "learning_rate": 8.173541976216278e-06, "loss": 17.7759, "step": 16560 }, { "epoch": 0.30272177235088743, "grad_norm": 8.21693445222621, "learning_rate": 8.173313227223618e-06, "loss": 18.1841, "step": 16561 }, { "epoch": 0.30274005154733397, "grad_norm": 5.965184469782462, "learning_rate": 8.173084467108768e-06, "loss": 17.167, "step": 16562 }, { "epoch": 0.3027583307437805, "grad_norm": 6.5322324471294095, "learning_rate": 8.172855695872535e-06, "loss": 17.8392, "step": 16563 }, { "epoch": 0.30277660994022704, "grad_norm": 6.699074543118785, "learning_rate": 8.172626913515716e-06, "loss": 17.4916, "step": 16564 }, { "epoch": 0.3027948891366736, "grad_norm": 7.690518849392542, "learning_rate": 8.172398120039115e-06, "loss": 18.1277, "step": 16565 }, { "epoch": 0.30281316833312005, "grad_norm": 6.593627364055679, "learning_rate": 8.172169315443536e-06, "loss": 17.7265, "step": 16566 }, { "epoch": 0.3028314475295666, "grad_norm": 7.555344403148906, "learning_rate": 8.171940499729776e-06, "loss": 17.5391, "step": 16567 }, { "epoch": 0.3028497267260131, "grad_norm": 6.291748527130846, "learning_rate": 8.171711672898642e-06, "loss": 17.4901, "step": 16568 }, { "epoch": 0.30286800592245966, "grad_norm": 6.284059474720727, "learning_rate": 8.171482834950932e-06, "loss": 17.5326, "step": 16569 }, { "epoch": 0.3028862851189062, "grad_norm": 6.193792799554601, "learning_rate": 8.171253985887452e-06, "loss": 17.4754, "step": 16570 }, { "epoch": 0.3029045643153527, "grad_norm": 5.946877100607961, "learning_rate": 8.171025125709002e-06, "loss": 17.2342, "step": 16571 }, { "epoch": 0.3029228435117992, "grad_norm": 6.540139571152314, "learning_rate": 8.170796254416382e-06, "loss": 17.6354, "step": 16572 }, { "epoch": 0.30294112270824575, "grad_norm": 7.6721978967782665, "learning_rate": 8.170567372010396e-06, "loss": 18.2392, "step": 16573 }, { "epoch": 0.3029594019046923, "grad_norm": 7.625256006654974, "learning_rate": 8.170338478491849e-06, "loss": 17.9819, "step": 16574 }, { "epoch": 0.3029776811011388, "grad_norm": 6.890673777461589, "learning_rate": 8.17010957386154e-06, "loss": 17.7003, "step": 16575 }, { "epoch": 0.3029959602975853, "grad_norm": 5.740488014371495, "learning_rate": 8.169880658120271e-06, "loss": 17.1102, "step": 16576 }, { "epoch": 0.30301423949403183, "grad_norm": 6.974765187209739, "learning_rate": 8.169651731268846e-06, "loss": 17.6724, "step": 16577 }, { "epoch": 0.30303251869047837, "grad_norm": 7.644279023603949, "learning_rate": 8.169422793308067e-06, "loss": 18.1045, "step": 16578 }, { "epoch": 0.3030507978869249, "grad_norm": 6.8224864213141005, "learning_rate": 8.169193844238735e-06, "loss": 18.0005, "step": 16579 }, { "epoch": 0.30306907708337144, "grad_norm": 7.407862070402675, "learning_rate": 8.168964884061654e-06, "loss": 18.038, "step": 16580 }, { "epoch": 0.3030873562798179, "grad_norm": 7.445578759311784, "learning_rate": 8.168735912777626e-06, "loss": 17.8016, "step": 16581 }, { "epoch": 0.30310563547626446, "grad_norm": 7.453146616106986, "learning_rate": 8.168506930387455e-06, "loss": 17.6925, "step": 16582 }, { "epoch": 0.303123914672711, "grad_norm": 6.647364772448283, "learning_rate": 8.16827793689194e-06, "loss": 17.9154, "step": 16583 }, { "epoch": 0.30314219386915753, "grad_norm": 6.6805973224818604, "learning_rate": 8.168048932291887e-06, "loss": 17.7983, "step": 16584 }, { "epoch": 0.30316047306560406, "grad_norm": 6.81124673399441, "learning_rate": 8.167819916588098e-06, "loss": 17.4131, "step": 16585 }, { "epoch": 0.30317875226205054, "grad_norm": 5.71800434990808, "learning_rate": 8.167590889781374e-06, "loss": 17.193, "step": 16586 }, { "epoch": 0.3031970314584971, "grad_norm": 6.357336002746887, "learning_rate": 8.16736185187252e-06, "loss": 17.6707, "step": 16587 }, { "epoch": 0.3032153106549436, "grad_norm": 6.7051551075638995, "learning_rate": 8.167132802862337e-06, "loss": 17.7506, "step": 16588 }, { "epoch": 0.30323358985139015, "grad_norm": 6.0656603212703715, "learning_rate": 8.166903742751629e-06, "loss": 17.3446, "step": 16589 }, { "epoch": 0.30325186904783663, "grad_norm": 6.234430686785868, "learning_rate": 8.166674671541197e-06, "loss": 17.5529, "step": 16590 }, { "epoch": 0.30327014824428317, "grad_norm": 6.07744793907776, "learning_rate": 8.166445589231844e-06, "loss": 17.3579, "step": 16591 }, { "epoch": 0.3032884274407297, "grad_norm": 7.458944463186073, "learning_rate": 8.166216495824377e-06, "loss": 18.1661, "step": 16592 }, { "epoch": 0.30330670663717624, "grad_norm": 7.237524505725243, "learning_rate": 8.165987391319595e-06, "loss": 17.9962, "step": 16593 }, { "epoch": 0.3033249858336228, "grad_norm": 6.211035357940235, "learning_rate": 8.165758275718299e-06, "loss": 17.4706, "step": 16594 }, { "epoch": 0.30334326503006925, "grad_norm": 6.169519490500913, "learning_rate": 8.1655291490213e-06, "loss": 17.4826, "step": 16595 }, { "epoch": 0.3033615442265158, "grad_norm": 7.016402761120805, "learning_rate": 8.165300011229391e-06, "loss": 17.6625, "step": 16596 }, { "epoch": 0.3033798234229623, "grad_norm": 7.2425316947153355, "learning_rate": 8.165070862343383e-06, "loss": 17.8298, "step": 16597 }, { "epoch": 0.30339810261940886, "grad_norm": 5.969118681307772, "learning_rate": 8.164841702364074e-06, "loss": 17.277, "step": 16598 }, { "epoch": 0.3034163818158554, "grad_norm": 6.9113068676432805, "learning_rate": 8.164612531292272e-06, "loss": 17.8815, "step": 16599 }, { "epoch": 0.3034346610123019, "grad_norm": 6.00061687960561, "learning_rate": 8.164383349128778e-06, "loss": 17.2717, "step": 16600 }, { "epoch": 0.3034529402087484, "grad_norm": 6.190804065741893, "learning_rate": 8.164154155874392e-06, "loss": 17.3708, "step": 16601 }, { "epoch": 0.30347121940519495, "grad_norm": 5.90428780466989, "learning_rate": 8.163924951529922e-06, "loss": 17.2192, "step": 16602 }, { "epoch": 0.3034894986016415, "grad_norm": 8.428949101455649, "learning_rate": 8.16369573609617e-06, "loss": 18.2044, "step": 16603 }, { "epoch": 0.303507777798088, "grad_norm": 6.087154641374202, "learning_rate": 8.163466509573938e-06, "loss": 17.4781, "step": 16604 }, { "epoch": 0.3035260569945345, "grad_norm": 5.548964191141877, "learning_rate": 8.163237271964032e-06, "loss": 17.1363, "step": 16605 }, { "epoch": 0.30354433619098103, "grad_norm": 6.528545508294717, "learning_rate": 8.163008023267253e-06, "loss": 17.6114, "step": 16606 }, { "epoch": 0.30356261538742757, "grad_norm": 5.317325238136297, "learning_rate": 8.162778763484405e-06, "loss": 16.9295, "step": 16607 }, { "epoch": 0.3035808945838741, "grad_norm": 6.628272614822982, "learning_rate": 8.162549492616292e-06, "loss": 17.7807, "step": 16608 }, { "epoch": 0.30359917378032064, "grad_norm": 6.85699704842018, "learning_rate": 8.162320210663717e-06, "loss": 17.6457, "step": 16609 }, { "epoch": 0.3036174529767671, "grad_norm": 6.036235766142873, "learning_rate": 8.162090917627486e-06, "loss": 17.0974, "step": 16610 }, { "epoch": 0.30363573217321366, "grad_norm": 6.257521317624505, "learning_rate": 8.161861613508399e-06, "loss": 17.6131, "step": 16611 }, { "epoch": 0.3036540113696602, "grad_norm": 7.3301207660809515, "learning_rate": 8.161632298307261e-06, "loss": 17.7923, "step": 16612 }, { "epoch": 0.30367229056610673, "grad_norm": 8.590125791099593, "learning_rate": 8.161402972024876e-06, "loss": 18.2708, "step": 16613 }, { "epoch": 0.30369056976255326, "grad_norm": 6.756733202026823, "learning_rate": 8.16117363466205e-06, "loss": 17.8269, "step": 16614 }, { "epoch": 0.30370884895899974, "grad_norm": 7.909718751814012, "learning_rate": 8.160944286219582e-06, "loss": 17.9376, "step": 16615 }, { "epoch": 0.3037271281554463, "grad_norm": 5.841152948775874, "learning_rate": 8.160714926698281e-06, "loss": 17.2749, "step": 16616 }, { "epoch": 0.3037454073518928, "grad_norm": 6.303060380406051, "learning_rate": 8.160485556098948e-06, "loss": 17.6905, "step": 16617 }, { "epoch": 0.30376368654833935, "grad_norm": 7.849903925154078, "learning_rate": 8.160256174422387e-06, "loss": 18.0157, "step": 16618 }, { "epoch": 0.3037819657447859, "grad_norm": 6.219801675497526, "learning_rate": 8.160026781669401e-06, "loss": 17.5778, "step": 16619 }, { "epoch": 0.30380024494123237, "grad_norm": 6.340471740537694, "learning_rate": 8.159797377840799e-06, "loss": 17.2766, "step": 16620 }, { "epoch": 0.3038185241376789, "grad_norm": 6.8026489423000065, "learning_rate": 8.159567962937379e-06, "loss": 17.823, "step": 16621 }, { "epoch": 0.30383680333412544, "grad_norm": 7.274009078158674, "learning_rate": 8.159338536959946e-06, "loss": 18.1152, "step": 16622 }, { "epoch": 0.303855082530572, "grad_norm": 8.867369198726927, "learning_rate": 8.159109099909309e-06, "loss": 18.6322, "step": 16623 }, { "epoch": 0.30387336172701845, "grad_norm": 7.441710985970655, "learning_rate": 8.158879651786266e-06, "loss": 18.0164, "step": 16624 }, { "epoch": 0.303891640923465, "grad_norm": 5.665058720299332, "learning_rate": 8.158650192591625e-06, "loss": 17.2601, "step": 16625 }, { "epoch": 0.3039099201199115, "grad_norm": 5.330355568794583, "learning_rate": 8.158420722326188e-06, "loss": 17.0789, "step": 16626 }, { "epoch": 0.30392819931635806, "grad_norm": 6.799221602475565, "learning_rate": 8.158191240990761e-06, "loss": 17.7676, "step": 16627 }, { "epoch": 0.3039464785128046, "grad_norm": 7.771883530264951, "learning_rate": 8.157961748586149e-06, "loss": 17.9648, "step": 16628 }, { "epoch": 0.3039647577092511, "grad_norm": 5.87215653021683, "learning_rate": 8.157732245113153e-06, "loss": 17.3832, "step": 16629 }, { "epoch": 0.3039830369056976, "grad_norm": 7.254285161872083, "learning_rate": 8.157502730572581e-06, "loss": 17.9605, "step": 16630 }, { "epoch": 0.30400131610214415, "grad_norm": 5.982002777119766, "learning_rate": 8.157273204965238e-06, "loss": 17.2095, "step": 16631 }, { "epoch": 0.3040195952985907, "grad_norm": 5.96001871350836, "learning_rate": 8.157043668291922e-06, "loss": 17.3055, "step": 16632 }, { "epoch": 0.3040378744950372, "grad_norm": 6.435863027618191, "learning_rate": 8.156814120553445e-06, "loss": 17.7032, "step": 16633 }, { "epoch": 0.3040561536914837, "grad_norm": 7.57322633761336, "learning_rate": 8.156584561750606e-06, "loss": 18.0216, "step": 16634 }, { "epoch": 0.30407443288793023, "grad_norm": 6.696133293513736, "learning_rate": 8.156354991884214e-06, "loss": 17.692, "step": 16635 }, { "epoch": 0.30409271208437677, "grad_norm": 8.727030605321485, "learning_rate": 8.156125410955071e-06, "loss": 18.3051, "step": 16636 }, { "epoch": 0.3041109912808233, "grad_norm": 6.733621858197244, "learning_rate": 8.155895818963982e-06, "loss": 17.498, "step": 16637 }, { "epoch": 0.30412927047726984, "grad_norm": 5.976496155872423, "learning_rate": 8.155666215911754e-06, "loss": 17.279, "step": 16638 }, { "epoch": 0.3041475496737163, "grad_norm": 6.118867408043492, "learning_rate": 8.155436601799187e-06, "loss": 17.4757, "step": 16639 }, { "epoch": 0.30416582887016286, "grad_norm": 5.735956527341406, "learning_rate": 8.15520697662709e-06, "loss": 17.1856, "step": 16640 }, { "epoch": 0.3041841080666094, "grad_norm": 7.487874252516413, "learning_rate": 8.154977340396264e-06, "loss": 18.0411, "step": 16641 }, { "epoch": 0.30420238726305593, "grad_norm": 7.761509509625328, "learning_rate": 8.154747693107518e-06, "loss": 18.2016, "step": 16642 }, { "epoch": 0.30422066645950246, "grad_norm": 6.191408684603873, "learning_rate": 8.154518034761657e-06, "loss": 17.5018, "step": 16643 }, { "epoch": 0.30423894565594894, "grad_norm": 7.728201685651844, "learning_rate": 8.154288365359483e-06, "loss": 17.9976, "step": 16644 }, { "epoch": 0.3042572248523955, "grad_norm": 5.864159128704303, "learning_rate": 8.1540586849018e-06, "loss": 17.3021, "step": 16645 }, { "epoch": 0.304275504048842, "grad_norm": 5.7828330681318105, "learning_rate": 8.153828993389417e-06, "loss": 17.1839, "step": 16646 }, { "epoch": 0.30429378324528855, "grad_norm": 6.2154094997417895, "learning_rate": 8.153599290823136e-06, "loss": 17.5577, "step": 16647 }, { "epoch": 0.3043120624417351, "grad_norm": 5.964364786629598, "learning_rate": 8.153369577203764e-06, "loss": 17.4375, "step": 16648 }, { "epoch": 0.30433034163818157, "grad_norm": 6.622789388812934, "learning_rate": 8.153139852532104e-06, "loss": 17.3699, "step": 16649 }, { "epoch": 0.3043486208346281, "grad_norm": 8.46486387940904, "learning_rate": 8.152910116808962e-06, "loss": 18.9387, "step": 16650 }, { "epoch": 0.30436690003107464, "grad_norm": 6.588297848353495, "learning_rate": 8.152680370035146e-06, "loss": 17.6354, "step": 16651 }, { "epoch": 0.3043851792275212, "grad_norm": 7.363797111774982, "learning_rate": 8.152450612211457e-06, "loss": 17.824, "step": 16652 }, { "epoch": 0.3044034584239677, "grad_norm": 7.877980197925042, "learning_rate": 8.152220843338704e-06, "loss": 18.4074, "step": 16653 }, { "epoch": 0.3044217376204142, "grad_norm": 5.609245018958935, "learning_rate": 8.15199106341769e-06, "loss": 17.25, "step": 16654 }, { "epoch": 0.3044400168168607, "grad_norm": 6.194752741290725, "learning_rate": 8.151761272449219e-06, "loss": 17.7048, "step": 16655 }, { "epoch": 0.30445829601330726, "grad_norm": 5.858072727717588, "learning_rate": 8.151531470434099e-06, "loss": 17.364, "step": 16656 }, { "epoch": 0.3044765752097538, "grad_norm": 7.032068695583198, "learning_rate": 8.151301657373136e-06, "loss": 17.9112, "step": 16657 }, { "epoch": 0.3044948544062003, "grad_norm": 5.809396707838711, "learning_rate": 8.151071833267135e-06, "loss": 17.2292, "step": 16658 }, { "epoch": 0.3045131336026468, "grad_norm": 6.118237197029155, "learning_rate": 8.150841998116898e-06, "loss": 17.5482, "step": 16659 }, { "epoch": 0.30453141279909335, "grad_norm": 11.032475752883897, "learning_rate": 8.150612151923234e-06, "loss": 18.1286, "step": 16660 }, { "epoch": 0.3045496919955399, "grad_norm": 5.7063388817630285, "learning_rate": 8.150382294686948e-06, "loss": 17.4102, "step": 16661 }, { "epoch": 0.3045679711919864, "grad_norm": 6.531306214638482, "learning_rate": 8.150152426408845e-06, "loss": 17.4352, "step": 16662 }, { "epoch": 0.3045862503884329, "grad_norm": 5.562829033755462, "learning_rate": 8.14992254708973e-06, "loss": 17.2006, "step": 16663 }, { "epoch": 0.30460452958487944, "grad_norm": 6.371054500796451, "learning_rate": 8.149692656730413e-06, "loss": 17.6054, "step": 16664 }, { "epoch": 0.30462280878132597, "grad_norm": 7.696357414683173, "learning_rate": 8.149462755331695e-06, "loss": 18.2222, "step": 16665 }, { "epoch": 0.3046410879777725, "grad_norm": 6.145415972817905, "learning_rate": 8.149232842894384e-06, "loss": 17.449, "step": 16666 }, { "epoch": 0.30465936717421904, "grad_norm": 7.071202653939831, "learning_rate": 8.149002919419282e-06, "loss": 18.3085, "step": 16667 }, { "epoch": 0.3046776463706655, "grad_norm": 7.551379793858554, "learning_rate": 8.148772984907203e-06, "loss": 17.9634, "step": 16668 }, { "epoch": 0.30469592556711206, "grad_norm": 8.186109212308564, "learning_rate": 8.148543039358944e-06, "loss": 18.2248, "step": 16669 }, { "epoch": 0.3047142047635586, "grad_norm": 6.466871249323418, "learning_rate": 8.148313082775316e-06, "loss": 17.5463, "step": 16670 }, { "epoch": 0.30473248396000513, "grad_norm": 6.293822843931664, "learning_rate": 8.148083115157124e-06, "loss": 17.3767, "step": 16671 }, { "epoch": 0.30475076315645167, "grad_norm": 6.636580017643419, "learning_rate": 8.147853136505175e-06, "loss": 17.6899, "step": 16672 }, { "epoch": 0.30476904235289815, "grad_norm": 6.777519905813917, "learning_rate": 8.147623146820272e-06, "loss": 17.7548, "step": 16673 }, { "epoch": 0.3047873215493447, "grad_norm": 7.441594029204132, "learning_rate": 8.147393146103224e-06, "loss": 17.8708, "step": 16674 }, { "epoch": 0.3048056007457912, "grad_norm": 6.356874956270289, "learning_rate": 8.147163134354836e-06, "loss": 17.5734, "step": 16675 }, { "epoch": 0.30482387994223775, "grad_norm": 7.140009436584371, "learning_rate": 8.146933111575915e-06, "loss": 17.7291, "step": 16676 }, { "epoch": 0.3048421591386843, "grad_norm": 6.992353068374214, "learning_rate": 8.146703077767265e-06, "loss": 17.9955, "step": 16677 }, { "epoch": 0.30486043833513077, "grad_norm": 6.299840670557331, "learning_rate": 8.146473032929693e-06, "loss": 17.4187, "step": 16678 }, { "epoch": 0.3048787175315773, "grad_norm": 5.890386436433404, "learning_rate": 8.146242977064009e-06, "loss": 17.3146, "step": 16679 }, { "epoch": 0.30489699672802384, "grad_norm": 7.095378126234456, "learning_rate": 8.146012910171014e-06, "loss": 17.591, "step": 16680 }, { "epoch": 0.3049152759244704, "grad_norm": 5.414916195517052, "learning_rate": 8.14578283225152e-06, "loss": 17.2094, "step": 16681 }, { "epoch": 0.3049335551209169, "grad_norm": 8.092887418734362, "learning_rate": 8.145552743306327e-06, "loss": 17.7922, "step": 16682 }, { "epoch": 0.3049518343173634, "grad_norm": 7.124788941677222, "learning_rate": 8.145322643336245e-06, "loss": 17.6115, "step": 16683 }, { "epoch": 0.3049701135138099, "grad_norm": 6.079544236882442, "learning_rate": 8.14509253234208e-06, "loss": 17.7165, "step": 16684 }, { "epoch": 0.30498839271025646, "grad_norm": 7.861638541511168, "learning_rate": 8.14486241032464e-06, "loss": 17.9554, "step": 16685 }, { "epoch": 0.305006671906703, "grad_norm": 7.415013676567563, "learning_rate": 8.14463227728473e-06, "loss": 17.8059, "step": 16686 }, { "epoch": 0.30502495110314953, "grad_norm": 6.172542416710036, "learning_rate": 8.144402133223155e-06, "loss": 17.3136, "step": 16687 }, { "epoch": 0.305043230299596, "grad_norm": 7.380340395382709, "learning_rate": 8.144171978140725e-06, "loss": 17.1493, "step": 16688 }, { "epoch": 0.30506150949604255, "grad_norm": 6.811544579927895, "learning_rate": 8.143941812038244e-06, "loss": 17.8914, "step": 16689 }, { "epoch": 0.3050797886924891, "grad_norm": 6.0034266004955095, "learning_rate": 8.14371163491652e-06, "loss": 17.4623, "step": 16690 }, { "epoch": 0.3050980678889356, "grad_norm": 6.050861975044013, "learning_rate": 8.14348144677636e-06, "loss": 17.5379, "step": 16691 }, { "epoch": 0.3051163470853821, "grad_norm": 5.647014412074355, "learning_rate": 8.14325124761857e-06, "loss": 17.3413, "step": 16692 }, { "epoch": 0.30513462628182864, "grad_norm": 7.545296878365928, "learning_rate": 8.143021037443956e-06, "loss": 17.9563, "step": 16693 }, { "epoch": 0.30515290547827517, "grad_norm": 6.106851345102743, "learning_rate": 8.142790816253327e-06, "loss": 17.6058, "step": 16694 }, { "epoch": 0.3051711846747217, "grad_norm": 6.0532280414861015, "learning_rate": 8.14256058404749e-06, "loss": 17.3859, "step": 16695 }, { "epoch": 0.30518946387116824, "grad_norm": 5.322284477396805, "learning_rate": 8.14233034082725e-06, "loss": 17.4024, "step": 16696 }, { "epoch": 0.3052077430676147, "grad_norm": 8.295172495342939, "learning_rate": 8.142100086593414e-06, "loss": 18.5029, "step": 16697 }, { "epoch": 0.30522602226406126, "grad_norm": 8.014620110297049, "learning_rate": 8.141869821346791e-06, "loss": 18.4967, "step": 16698 }, { "epoch": 0.3052443014605078, "grad_norm": 7.109719991357483, "learning_rate": 8.141639545088189e-06, "loss": 17.7804, "step": 16699 }, { "epoch": 0.30526258065695433, "grad_norm": 7.570967708371013, "learning_rate": 8.141409257818409e-06, "loss": 17.7589, "step": 16700 }, { "epoch": 0.30528085985340087, "grad_norm": 5.743573043057227, "learning_rate": 8.141178959538263e-06, "loss": 17.3379, "step": 16701 }, { "epoch": 0.30529913904984735, "grad_norm": 5.60616088144606, "learning_rate": 8.140948650248559e-06, "loss": 17.2385, "step": 16702 }, { "epoch": 0.3053174182462939, "grad_norm": 5.142308309232406, "learning_rate": 8.140718329950101e-06, "loss": 17.0075, "step": 16703 }, { "epoch": 0.3053356974427404, "grad_norm": 6.547997382321389, "learning_rate": 8.140487998643699e-06, "loss": 17.8636, "step": 16704 }, { "epoch": 0.30535397663918695, "grad_norm": 5.658739944947381, "learning_rate": 8.140257656330159e-06, "loss": 17.2068, "step": 16705 }, { "epoch": 0.3053722558356335, "grad_norm": 6.732443229239918, "learning_rate": 8.140027303010288e-06, "loss": 17.6035, "step": 16706 }, { "epoch": 0.30539053503207997, "grad_norm": 7.543038726133314, "learning_rate": 8.139796938684892e-06, "loss": 18.0054, "step": 16707 }, { "epoch": 0.3054088142285265, "grad_norm": 5.737792449591821, "learning_rate": 8.139566563354782e-06, "loss": 17.1012, "step": 16708 }, { "epoch": 0.30542709342497304, "grad_norm": 5.283208433592343, "learning_rate": 8.139336177020765e-06, "loss": 17.0553, "step": 16709 }, { "epoch": 0.3054453726214196, "grad_norm": 4.807319283842524, "learning_rate": 8.139105779683645e-06, "loss": 16.9905, "step": 16710 }, { "epoch": 0.3054636518178661, "grad_norm": 7.485844959479744, "learning_rate": 8.138875371344232e-06, "loss": 17.9085, "step": 16711 }, { "epoch": 0.3054819310143126, "grad_norm": 5.759929392525538, "learning_rate": 8.138644952003334e-06, "loss": 17.2418, "step": 16712 }, { "epoch": 0.3055002102107591, "grad_norm": 6.841986283055872, "learning_rate": 8.138414521661758e-06, "loss": 17.6657, "step": 16713 }, { "epoch": 0.30551848940720566, "grad_norm": 7.699958157519718, "learning_rate": 8.13818408032031e-06, "loss": 18.2383, "step": 16714 }, { "epoch": 0.3055367686036522, "grad_norm": 5.396068550385644, "learning_rate": 8.1379536279798e-06, "loss": 17.256, "step": 16715 }, { "epoch": 0.30555504780009873, "grad_norm": 7.920276591533406, "learning_rate": 8.137723164641034e-06, "loss": 18.6347, "step": 16716 }, { "epoch": 0.3055733269965452, "grad_norm": 7.267905648980075, "learning_rate": 8.137492690304823e-06, "loss": 17.924, "step": 16717 }, { "epoch": 0.30559160619299175, "grad_norm": 5.882535555137481, "learning_rate": 8.13726220497197e-06, "loss": 17.3439, "step": 16718 }, { "epoch": 0.3056098853894383, "grad_norm": 7.564212895553327, "learning_rate": 8.137031708643283e-06, "loss": 17.7634, "step": 16719 }, { "epoch": 0.3056281645858848, "grad_norm": 8.278958609025626, "learning_rate": 8.136801201319578e-06, "loss": 17.9191, "step": 16720 }, { "epoch": 0.30564644378233136, "grad_norm": 5.837226528619861, "learning_rate": 8.136570683001652e-06, "loss": 17.2523, "step": 16721 }, { "epoch": 0.30566472297877784, "grad_norm": 7.32327415796826, "learning_rate": 8.136340153690321e-06, "loss": 17.8673, "step": 16722 }, { "epoch": 0.30568300217522437, "grad_norm": 10.108315129156512, "learning_rate": 8.13610961338639e-06, "loss": 18.5179, "step": 16723 }, { "epoch": 0.3057012813716709, "grad_norm": 6.933209439932622, "learning_rate": 8.135879062090663e-06, "loss": 17.9609, "step": 16724 }, { "epoch": 0.30571956056811744, "grad_norm": 7.230965566714261, "learning_rate": 8.135648499803956e-06, "loss": 17.7497, "step": 16725 }, { "epoch": 0.3057378397645639, "grad_norm": 6.254464458688985, "learning_rate": 8.135417926527072e-06, "loss": 17.5044, "step": 16726 }, { "epoch": 0.30575611896101046, "grad_norm": 6.747843528117833, "learning_rate": 8.135187342260819e-06, "loss": 17.9859, "step": 16727 }, { "epoch": 0.305774398157457, "grad_norm": 6.7380015745969875, "learning_rate": 8.134956747006009e-06, "loss": 17.922, "step": 16728 }, { "epoch": 0.30579267735390353, "grad_norm": 6.965720999503236, "learning_rate": 8.134726140763445e-06, "loss": 17.6706, "step": 16729 }, { "epoch": 0.30581095655035007, "grad_norm": 5.699536573914778, "learning_rate": 8.134495523533939e-06, "loss": 17.3331, "step": 16730 }, { "epoch": 0.30582923574679655, "grad_norm": 7.264395494959649, "learning_rate": 8.134264895318298e-06, "loss": 17.9404, "step": 16731 }, { "epoch": 0.3058475149432431, "grad_norm": 10.733387802977195, "learning_rate": 8.134034256117332e-06, "loss": 17.9636, "step": 16732 }, { "epoch": 0.3058657941396896, "grad_norm": 5.704574170589308, "learning_rate": 8.133803605931847e-06, "loss": 17.1899, "step": 16733 }, { "epoch": 0.30588407333613615, "grad_norm": 6.687750961798256, "learning_rate": 8.133572944762651e-06, "loss": 17.6118, "step": 16734 }, { "epoch": 0.3059023525325827, "grad_norm": 8.285488445118865, "learning_rate": 8.133342272610553e-06, "loss": 17.1306, "step": 16735 }, { "epoch": 0.30592063172902917, "grad_norm": 6.001814260037967, "learning_rate": 8.133111589476366e-06, "loss": 17.4356, "step": 16736 }, { "epoch": 0.3059389109254757, "grad_norm": 7.99769116871961, "learning_rate": 8.132880895360893e-06, "loss": 18.3703, "step": 16737 }, { "epoch": 0.30595719012192224, "grad_norm": 7.802336244065987, "learning_rate": 8.132650190264944e-06, "loss": 18.3243, "step": 16738 }, { "epoch": 0.3059754693183688, "grad_norm": 6.642160089090536, "learning_rate": 8.132419474189328e-06, "loss": 17.7333, "step": 16739 }, { "epoch": 0.3059937485148153, "grad_norm": 7.335260573506195, "learning_rate": 8.132188747134852e-06, "loss": 17.6557, "step": 16740 }, { "epoch": 0.3060120277112618, "grad_norm": 6.055280801158583, "learning_rate": 8.131958009102327e-06, "loss": 17.3898, "step": 16741 }, { "epoch": 0.3060303069077083, "grad_norm": 5.184181120365097, "learning_rate": 8.131727260092564e-06, "loss": 16.957, "step": 16742 }, { "epoch": 0.30604858610415486, "grad_norm": 7.012052898024959, "learning_rate": 8.131496500106366e-06, "loss": 18.0213, "step": 16743 }, { "epoch": 0.3060668653006014, "grad_norm": 7.012947203763594, "learning_rate": 8.131265729144544e-06, "loss": 17.6844, "step": 16744 }, { "epoch": 0.30608514449704793, "grad_norm": 6.594685830215474, "learning_rate": 8.131034947207909e-06, "loss": 17.6969, "step": 16745 }, { "epoch": 0.3061034236934944, "grad_norm": 7.3559014298241046, "learning_rate": 8.130804154297268e-06, "loss": 17.7295, "step": 16746 }, { "epoch": 0.30612170288994095, "grad_norm": 7.191641221787167, "learning_rate": 8.130573350413428e-06, "loss": 18.1313, "step": 16747 }, { "epoch": 0.3061399820863875, "grad_norm": 6.974589058037992, "learning_rate": 8.130342535557202e-06, "loss": 17.8844, "step": 16748 }, { "epoch": 0.306158261282834, "grad_norm": 6.325822738509391, "learning_rate": 8.130111709729396e-06, "loss": 17.3803, "step": 16749 }, { "epoch": 0.30617654047928056, "grad_norm": 5.97383920703589, "learning_rate": 8.129880872930822e-06, "loss": 17.5041, "step": 16750 }, { "epoch": 0.30619481967572704, "grad_norm": 5.784921630829452, "learning_rate": 8.129650025162285e-06, "loss": 17.3294, "step": 16751 }, { "epoch": 0.3062130988721736, "grad_norm": 5.714783397676278, "learning_rate": 8.129419166424597e-06, "loss": 17.3295, "step": 16752 }, { "epoch": 0.3062313780686201, "grad_norm": 5.472971448192495, "learning_rate": 8.129188296718566e-06, "loss": 17.1211, "step": 16753 }, { "epoch": 0.30624965726506664, "grad_norm": 8.371551177531051, "learning_rate": 8.128957416045003e-06, "loss": 17.8683, "step": 16754 }, { "epoch": 0.3062679364615132, "grad_norm": 6.703373792499423, "learning_rate": 8.128726524404715e-06, "loss": 17.5168, "step": 16755 }, { "epoch": 0.30628621565795966, "grad_norm": 6.402467016581645, "learning_rate": 8.128495621798511e-06, "loss": 17.5197, "step": 16756 }, { "epoch": 0.3063044948544062, "grad_norm": 6.957447476972262, "learning_rate": 8.128264708227203e-06, "loss": 17.8857, "step": 16757 }, { "epoch": 0.30632277405085273, "grad_norm": 7.614934722454362, "learning_rate": 8.128033783691598e-06, "loss": 18.1989, "step": 16758 }, { "epoch": 0.30634105324729927, "grad_norm": 6.2255533879289615, "learning_rate": 8.127802848192506e-06, "loss": 17.4933, "step": 16759 }, { "epoch": 0.30635933244374575, "grad_norm": 6.811878880010115, "learning_rate": 8.127571901730736e-06, "loss": 17.951, "step": 16760 }, { "epoch": 0.3063776116401923, "grad_norm": 6.885163504999443, "learning_rate": 8.127340944307099e-06, "loss": 17.8237, "step": 16761 }, { "epoch": 0.3063958908366388, "grad_norm": 6.169290540256493, "learning_rate": 8.127109975922402e-06, "loss": 17.4573, "step": 16762 }, { "epoch": 0.30641417003308535, "grad_norm": 6.963155307268487, "learning_rate": 8.126878996577456e-06, "loss": 17.8927, "step": 16763 }, { "epoch": 0.3064324492295319, "grad_norm": 7.110075754402128, "learning_rate": 8.12664800627307e-06, "loss": 18.0326, "step": 16764 }, { "epoch": 0.30645072842597837, "grad_norm": 7.334729002026811, "learning_rate": 8.126417005010056e-06, "loss": 17.9197, "step": 16765 }, { "epoch": 0.3064690076224249, "grad_norm": 5.815015540294881, "learning_rate": 8.12618599278922e-06, "loss": 17.5021, "step": 16766 }, { "epoch": 0.30648728681887144, "grad_norm": 6.733952977149314, "learning_rate": 8.125954969611373e-06, "loss": 17.5858, "step": 16767 }, { "epoch": 0.306505566015318, "grad_norm": 6.286434035949967, "learning_rate": 8.125723935477328e-06, "loss": 17.5503, "step": 16768 }, { "epoch": 0.3065238452117645, "grad_norm": 7.330028539524488, "learning_rate": 8.12549289038789e-06, "loss": 17.8128, "step": 16769 }, { "epoch": 0.306542124408211, "grad_norm": 6.148307060165565, "learning_rate": 8.12526183434387e-06, "loss": 17.4417, "step": 16770 }, { "epoch": 0.3065604036046575, "grad_norm": 6.539274763971553, "learning_rate": 8.125030767346081e-06, "loss": 17.5556, "step": 16771 }, { "epoch": 0.30657868280110406, "grad_norm": 7.796567227685249, "learning_rate": 8.124799689395328e-06, "loss": 18.2581, "step": 16772 }, { "epoch": 0.3065969619975506, "grad_norm": 6.881588784470505, "learning_rate": 8.124568600492421e-06, "loss": 17.586, "step": 16773 }, { "epoch": 0.30661524119399713, "grad_norm": 6.720588951388159, "learning_rate": 8.124337500638175e-06, "loss": 17.687, "step": 16774 }, { "epoch": 0.3066335203904436, "grad_norm": 6.651211194075465, "learning_rate": 8.124106389833397e-06, "loss": 17.6702, "step": 16775 }, { "epoch": 0.30665179958689015, "grad_norm": 5.27799086682439, "learning_rate": 8.123875268078898e-06, "loss": 16.9764, "step": 16776 }, { "epoch": 0.3066700787833367, "grad_norm": 5.655025221938781, "learning_rate": 8.123644135375487e-06, "loss": 17.2942, "step": 16777 }, { "epoch": 0.3066883579797832, "grad_norm": 9.086716992701747, "learning_rate": 8.123412991723975e-06, "loss": 18.2968, "step": 16778 }, { "epoch": 0.30670663717622976, "grad_norm": 7.865888714452238, "learning_rate": 8.123181837125169e-06, "loss": 18.2855, "step": 16779 }, { "epoch": 0.30672491637267624, "grad_norm": 6.064853050171648, "learning_rate": 8.122950671579884e-06, "loss": 17.5467, "step": 16780 }, { "epoch": 0.3067431955691228, "grad_norm": 5.548607974608034, "learning_rate": 8.122719495088926e-06, "loss": 17.1425, "step": 16781 }, { "epoch": 0.3067614747655693, "grad_norm": 5.511890499092476, "learning_rate": 8.12248830765311e-06, "loss": 17.3113, "step": 16782 }, { "epoch": 0.30677975396201584, "grad_norm": 5.745980862167241, "learning_rate": 8.12225710927324e-06, "loss": 17.3077, "step": 16783 }, { "epoch": 0.3067980331584624, "grad_norm": 6.277237563776308, "learning_rate": 8.12202589995013e-06, "loss": 17.7336, "step": 16784 }, { "epoch": 0.30681631235490886, "grad_norm": 6.482625000784745, "learning_rate": 8.121794679684593e-06, "loss": 17.507, "step": 16785 }, { "epoch": 0.3068345915513554, "grad_norm": 6.1322004915697095, "learning_rate": 8.121563448477434e-06, "loss": 17.5198, "step": 16786 }, { "epoch": 0.30685287074780193, "grad_norm": 7.408403820256594, "learning_rate": 8.121332206329468e-06, "loss": 17.9982, "step": 16787 }, { "epoch": 0.30687114994424847, "grad_norm": 6.359961007502059, "learning_rate": 8.121100953241501e-06, "loss": 17.3535, "step": 16788 }, { "epoch": 0.306889429140695, "grad_norm": 6.706687458532863, "learning_rate": 8.120869689214349e-06, "loss": 17.4008, "step": 16789 }, { "epoch": 0.3069077083371415, "grad_norm": 6.510804917349421, "learning_rate": 8.120638414248819e-06, "loss": 17.6156, "step": 16790 }, { "epoch": 0.306925987533588, "grad_norm": 6.008520202802392, "learning_rate": 8.12040712834572e-06, "loss": 17.4768, "step": 16791 }, { "epoch": 0.30694426673003455, "grad_norm": 6.809351771785792, "learning_rate": 8.120175831505865e-06, "loss": 17.5423, "step": 16792 }, { "epoch": 0.3069625459264811, "grad_norm": 8.908918303842555, "learning_rate": 8.119944523730065e-06, "loss": 18.2396, "step": 16793 }, { "epoch": 0.30698082512292757, "grad_norm": 6.16648566698405, "learning_rate": 8.119713205019131e-06, "loss": 17.4055, "step": 16794 }, { "epoch": 0.3069991043193741, "grad_norm": 6.690468500161972, "learning_rate": 8.119481875373874e-06, "loss": 17.5125, "step": 16795 }, { "epoch": 0.30701738351582064, "grad_norm": 7.690193087772361, "learning_rate": 8.1192505347951e-06, "loss": 18.2434, "step": 16796 }, { "epoch": 0.3070356627122672, "grad_norm": 6.21081997330213, "learning_rate": 8.119019183283627e-06, "loss": 17.406, "step": 16797 }, { "epoch": 0.3070539419087137, "grad_norm": 6.7458423446584534, "learning_rate": 8.118787820840261e-06, "loss": 17.6053, "step": 16798 }, { "epoch": 0.3070722211051602, "grad_norm": 6.222093098054909, "learning_rate": 8.118556447465815e-06, "loss": 17.4989, "step": 16799 }, { "epoch": 0.30709050030160673, "grad_norm": 7.178904037819462, "learning_rate": 8.118325063161099e-06, "loss": 17.8731, "step": 16800 }, { "epoch": 0.30710877949805326, "grad_norm": 7.297540371704384, "learning_rate": 8.118093667926923e-06, "loss": 17.6484, "step": 16801 }, { "epoch": 0.3071270586944998, "grad_norm": 6.5991204891772135, "learning_rate": 8.1178622617641e-06, "loss": 17.5603, "step": 16802 }, { "epoch": 0.30714533789094633, "grad_norm": 8.665284978980097, "learning_rate": 8.11763084467344e-06, "loss": 17.5106, "step": 16803 }, { "epoch": 0.3071636170873928, "grad_norm": 6.996091273311296, "learning_rate": 8.117399416655758e-06, "loss": 17.8823, "step": 16804 }, { "epoch": 0.30718189628383935, "grad_norm": 6.669947849826685, "learning_rate": 8.117167977711858e-06, "loss": 17.6966, "step": 16805 }, { "epoch": 0.3072001754802859, "grad_norm": 7.306246721884619, "learning_rate": 8.116936527842556e-06, "loss": 17.6553, "step": 16806 }, { "epoch": 0.3072184546767324, "grad_norm": 6.424525242590785, "learning_rate": 8.11670506704866e-06, "loss": 17.3905, "step": 16807 }, { "epoch": 0.30723673387317896, "grad_norm": 6.4793656402982025, "learning_rate": 8.116473595330985e-06, "loss": 17.6348, "step": 16808 }, { "epoch": 0.30725501306962544, "grad_norm": 7.7622202772727755, "learning_rate": 8.116242112690341e-06, "loss": 18.0591, "step": 16809 }, { "epoch": 0.307273292266072, "grad_norm": 5.689732872215455, "learning_rate": 8.116010619127537e-06, "loss": 17.0951, "step": 16810 }, { "epoch": 0.3072915714625185, "grad_norm": 7.211639722715736, "learning_rate": 8.115779114643386e-06, "loss": 17.9286, "step": 16811 }, { "epoch": 0.30730985065896504, "grad_norm": 5.708995603719906, "learning_rate": 8.1155475992387e-06, "loss": 17.0867, "step": 16812 }, { "epoch": 0.3073281298554116, "grad_norm": 6.614224500834429, "learning_rate": 8.115316072914292e-06, "loss": 17.563, "step": 16813 }, { "epoch": 0.30734640905185806, "grad_norm": 5.890866472730227, "learning_rate": 8.11508453567097e-06, "loss": 17.3403, "step": 16814 }, { "epoch": 0.3073646882483046, "grad_norm": 10.282356418660505, "learning_rate": 8.114852987509546e-06, "loss": 18.3379, "step": 16815 }, { "epoch": 0.30738296744475113, "grad_norm": 6.636957431138423, "learning_rate": 8.114621428430834e-06, "loss": 17.5146, "step": 16816 }, { "epoch": 0.30740124664119767, "grad_norm": 8.012350806778004, "learning_rate": 8.114389858435643e-06, "loss": 18.3006, "step": 16817 }, { "epoch": 0.3074195258376442, "grad_norm": 5.57934032195251, "learning_rate": 8.114158277524788e-06, "loss": 17.2041, "step": 16818 }, { "epoch": 0.3074378050340907, "grad_norm": 5.903979757135749, "learning_rate": 8.113926685699076e-06, "loss": 17.2361, "step": 16819 }, { "epoch": 0.3074560842305372, "grad_norm": 5.890769605037617, "learning_rate": 8.113695082959323e-06, "loss": 17.2154, "step": 16820 }, { "epoch": 0.30747436342698375, "grad_norm": 6.167782280137336, "learning_rate": 8.113463469306338e-06, "loss": 17.605, "step": 16821 }, { "epoch": 0.3074926426234303, "grad_norm": 6.041483161343982, "learning_rate": 8.113231844740934e-06, "loss": 17.276, "step": 16822 }, { "epoch": 0.3075109218198768, "grad_norm": 6.384100256896727, "learning_rate": 8.113000209263923e-06, "loss": 17.6484, "step": 16823 }, { "epoch": 0.3075292010163233, "grad_norm": 7.4010222801699, "learning_rate": 8.112768562876115e-06, "loss": 17.938, "step": 16824 }, { "epoch": 0.30754748021276984, "grad_norm": 5.227382045976373, "learning_rate": 8.112536905578324e-06, "loss": 17.0111, "step": 16825 }, { "epoch": 0.3075657594092164, "grad_norm": 6.248957610211808, "learning_rate": 8.112305237371363e-06, "loss": 17.6124, "step": 16826 }, { "epoch": 0.3075840386056629, "grad_norm": 8.505216408345236, "learning_rate": 8.11207355825604e-06, "loss": 18.6358, "step": 16827 }, { "epoch": 0.3076023178021094, "grad_norm": 5.564588254452631, "learning_rate": 8.111841868233169e-06, "loss": 17.1388, "step": 16828 }, { "epoch": 0.30762059699855593, "grad_norm": 6.53574604524088, "learning_rate": 8.111610167303564e-06, "loss": 17.4384, "step": 16829 }, { "epoch": 0.30763887619500246, "grad_norm": 5.688050011488321, "learning_rate": 8.111378455468033e-06, "loss": 17.2404, "step": 16830 }, { "epoch": 0.307657155391449, "grad_norm": 7.501892039648677, "learning_rate": 8.111146732727393e-06, "loss": 18.3043, "step": 16831 }, { "epoch": 0.30767543458789554, "grad_norm": 7.138462604852312, "learning_rate": 8.110914999082453e-06, "loss": 18.0898, "step": 16832 }, { "epoch": 0.307693713784342, "grad_norm": 7.377713723181878, "learning_rate": 8.110683254534026e-06, "loss": 18.0345, "step": 16833 }, { "epoch": 0.30771199298078855, "grad_norm": 6.676124166974646, "learning_rate": 8.110451499082923e-06, "loss": 17.518, "step": 16834 }, { "epoch": 0.3077302721772351, "grad_norm": 6.495347737747358, "learning_rate": 8.110219732729958e-06, "loss": 17.639, "step": 16835 }, { "epoch": 0.3077485513736816, "grad_norm": 7.532118493682625, "learning_rate": 8.109987955475943e-06, "loss": 18.0588, "step": 16836 }, { "epoch": 0.30776683057012816, "grad_norm": 6.460262745617369, "learning_rate": 8.10975616732169e-06, "loss": 17.4713, "step": 16837 }, { "epoch": 0.30778510976657464, "grad_norm": 6.988802108291433, "learning_rate": 8.109524368268011e-06, "loss": 17.7515, "step": 16838 }, { "epoch": 0.3078033889630212, "grad_norm": 7.09823848055091, "learning_rate": 8.10929255831572e-06, "loss": 17.3706, "step": 16839 }, { "epoch": 0.3078216681594677, "grad_norm": 6.16473466216064, "learning_rate": 8.109060737465628e-06, "loss": 17.6117, "step": 16840 }, { "epoch": 0.30783994735591425, "grad_norm": 6.333954307177859, "learning_rate": 8.108828905718547e-06, "loss": 17.2888, "step": 16841 }, { "epoch": 0.3078582265523608, "grad_norm": 7.885699569862376, "learning_rate": 8.10859706307529e-06, "loss": 18.4863, "step": 16842 }, { "epoch": 0.30787650574880726, "grad_norm": 6.5957527771697055, "learning_rate": 8.108365209536672e-06, "loss": 17.7022, "step": 16843 }, { "epoch": 0.3078947849452538, "grad_norm": 6.928343747044289, "learning_rate": 8.108133345103505e-06, "loss": 17.6217, "step": 16844 }, { "epoch": 0.30791306414170033, "grad_norm": 5.832568707890773, "learning_rate": 8.107901469776595e-06, "loss": 17.4022, "step": 16845 }, { "epoch": 0.30793134333814687, "grad_norm": 6.3452969762109435, "learning_rate": 8.107669583556763e-06, "loss": 17.3455, "step": 16846 }, { "epoch": 0.3079496225345934, "grad_norm": 7.294542601090386, "learning_rate": 8.10743768644482e-06, "loss": 18.1352, "step": 16847 }, { "epoch": 0.3079679017310399, "grad_norm": 6.319800452694817, "learning_rate": 8.107205778441576e-06, "loss": 17.7499, "step": 16848 }, { "epoch": 0.3079861809274864, "grad_norm": 5.88348935253275, "learning_rate": 8.106973859547847e-06, "loss": 17.2997, "step": 16849 }, { "epoch": 0.30800446012393295, "grad_norm": 6.2635753404745325, "learning_rate": 8.106741929764443e-06, "loss": 17.7991, "step": 16850 }, { "epoch": 0.3080227393203795, "grad_norm": 5.887526511443674, "learning_rate": 8.106509989092179e-06, "loss": 17.331, "step": 16851 }, { "epoch": 0.308041018516826, "grad_norm": 5.916620641359955, "learning_rate": 8.106278037531864e-06, "loss": 17.3204, "step": 16852 }, { "epoch": 0.3080592977132725, "grad_norm": 9.593246848857893, "learning_rate": 8.106046075084317e-06, "loss": 18.2151, "step": 16853 }, { "epoch": 0.30807757690971904, "grad_norm": 7.7937880274598745, "learning_rate": 8.105814101750349e-06, "loss": 17.7964, "step": 16854 }, { "epoch": 0.3080958561061656, "grad_norm": 6.027667395664885, "learning_rate": 8.10558211753077e-06, "loss": 17.1767, "step": 16855 }, { "epoch": 0.3081141353026121, "grad_norm": 6.311521404106981, "learning_rate": 8.105350122426393e-06, "loss": 17.51, "step": 16856 }, { "epoch": 0.30813241449905865, "grad_norm": 6.497565444424784, "learning_rate": 8.105118116438037e-06, "loss": 17.3181, "step": 16857 }, { "epoch": 0.30815069369550513, "grad_norm": 6.621864204055428, "learning_rate": 8.104886099566511e-06, "loss": 17.764, "step": 16858 }, { "epoch": 0.30816897289195166, "grad_norm": 7.942768946301585, "learning_rate": 8.104654071812629e-06, "loss": 17.9489, "step": 16859 }, { "epoch": 0.3081872520883982, "grad_norm": 7.0570350180681745, "learning_rate": 8.104422033177201e-06, "loss": 17.6731, "step": 16860 }, { "epoch": 0.30820553128484474, "grad_norm": 6.918981765212633, "learning_rate": 8.104189983661047e-06, "loss": 17.8462, "step": 16861 }, { "epoch": 0.3082238104812912, "grad_norm": 7.644666345858515, "learning_rate": 8.103957923264974e-06, "loss": 17.7347, "step": 16862 }, { "epoch": 0.30824208967773775, "grad_norm": 7.955988581314848, "learning_rate": 8.1037258519898e-06, "loss": 18.2606, "step": 16863 }, { "epoch": 0.3082603688741843, "grad_norm": 6.1781673866031745, "learning_rate": 8.103493769836332e-06, "loss": 17.3356, "step": 16864 }, { "epoch": 0.3082786480706308, "grad_norm": 8.316612046608348, "learning_rate": 8.103261676805392e-06, "loss": 18.5851, "step": 16865 }, { "epoch": 0.30829692726707736, "grad_norm": 7.471215637665413, "learning_rate": 8.103029572897787e-06, "loss": 18.0248, "step": 16866 }, { "epoch": 0.30831520646352384, "grad_norm": 5.831401656742476, "learning_rate": 8.102797458114332e-06, "loss": 17.2288, "step": 16867 }, { "epoch": 0.3083334856599704, "grad_norm": 6.277362015516548, "learning_rate": 8.102565332455843e-06, "loss": 17.3635, "step": 16868 }, { "epoch": 0.3083517648564169, "grad_norm": 7.113790971566835, "learning_rate": 8.102333195923131e-06, "loss": 17.7055, "step": 16869 }, { "epoch": 0.30837004405286345, "grad_norm": 6.126092734369032, "learning_rate": 8.10210104851701e-06, "loss": 17.5085, "step": 16870 }, { "epoch": 0.30838832324931, "grad_norm": 6.734911255508399, "learning_rate": 8.101868890238294e-06, "loss": 17.8684, "step": 16871 }, { "epoch": 0.30840660244575646, "grad_norm": 6.176690289964088, "learning_rate": 8.101636721087799e-06, "loss": 17.6733, "step": 16872 }, { "epoch": 0.308424881642203, "grad_norm": 6.466825704602649, "learning_rate": 8.101404541066331e-06, "loss": 18.0116, "step": 16873 }, { "epoch": 0.30844316083864953, "grad_norm": 8.540871578912435, "learning_rate": 8.101172350174713e-06, "loss": 18.262, "step": 16874 }, { "epoch": 0.30846144003509607, "grad_norm": 6.476487326471872, "learning_rate": 8.100940148413755e-06, "loss": 17.787, "step": 16875 }, { "epoch": 0.3084797192315426, "grad_norm": 6.543371046583851, "learning_rate": 8.100707935784271e-06, "loss": 17.5618, "step": 16876 }, { "epoch": 0.3084979984279891, "grad_norm": 5.191648147949043, "learning_rate": 8.100475712287074e-06, "loss": 17.1827, "step": 16877 }, { "epoch": 0.3085162776244356, "grad_norm": 8.132230026553794, "learning_rate": 8.10024347792298e-06, "loss": 18.4899, "step": 16878 }, { "epoch": 0.30853455682088216, "grad_norm": 7.1031659533241465, "learning_rate": 8.100011232692799e-06, "loss": 17.8651, "step": 16879 }, { "epoch": 0.3085528360173287, "grad_norm": 7.425041626183908, "learning_rate": 8.09977897659735e-06, "loss": 17.7215, "step": 16880 }, { "epoch": 0.3085711152137752, "grad_norm": 7.556925177089032, "learning_rate": 8.099546709637444e-06, "loss": 18.1119, "step": 16881 }, { "epoch": 0.3085893944102217, "grad_norm": 6.519700635836287, "learning_rate": 8.099314431813895e-06, "loss": 17.8108, "step": 16882 }, { "epoch": 0.30860767360666824, "grad_norm": 6.792229799249035, "learning_rate": 8.099082143127518e-06, "loss": 17.4976, "step": 16883 }, { "epoch": 0.3086259528031148, "grad_norm": 5.103482726645891, "learning_rate": 8.098849843579128e-06, "loss": 17.0982, "step": 16884 }, { "epoch": 0.3086442319995613, "grad_norm": 6.815483530135922, "learning_rate": 8.098617533169538e-06, "loss": 17.8734, "step": 16885 }, { "epoch": 0.30866251119600785, "grad_norm": 6.605615636643606, "learning_rate": 8.098385211899562e-06, "loss": 17.7568, "step": 16886 }, { "epoch": 0.30868079039245433, "grad_norm": 6.773876397373886, "learning_rate": 8.098152879770015e-06, "loss": 17.5579, "step": 16887 }, { "epoch": 0.30869906958890087, "grad_norm": 6.326966428213546, "learning_rate": 8.09792053678171e-06, "loss": 17.5555, "step": 16888 }, { "epoch": 0.3087173487853474, "grad_norm": 7.177618927533809, "learning_rate": 8.097688182935463e-06, "loss": 17.9881, "step": 16889 }, { "epoch": 0.30873562798179394, "grad_norm": 6.649670489629989, "learning_rate": 8.097455818232089e-06, "loss": 17.6745, "step": 16890 }, { "epoch": 0.30875390717824047, "grad_norm": 7.538466543305304, "learning_rate": 8.097223442672399e-06, "loss": 17.9156, "step": 16891 }, { "epoch": 0.30877218637468695, "grad_norm": 7.372745503381064, "learning_rate": 8.096991056257212e-06, "loss": 17.7921, "step": 16892 }, { "epoch": 0.3087904655711335, "grad_norm": 7.89383907410312, "learning_rate": 8.096758658987339e-06, "loss": 18.3288, "step": 16893 }, { "epoch": 0.30880874476758, "grad_norm": 6.242170963309946, "learning_rate": 8.096526250863594e-06, "loss": 17.5095, "step": 16894 }, { "epoch": 0.30882702396402656, "grad_norm": 6.301612936572135, "learning_rate": 8.096293831886795e-06, "loss": 17.3264, "step": 16895 }, { "epoch": 0.30884530316047304, "grad_norm": 5.8358432367779836, "learning_rate": 8.096061402057755e-06, "loss": 17.351, "step": 16896 }, { "epoch": 0.3088635823569196, "grad_norm": 5.0439347738848, "learning_rate": 8.095828961377287e-06, "loss": 16.9685, "step": 16897 }, { "epoch": 0.3088818615533661, "grad_norm": 5.497715837812876, "learning_rate": 8.095596509846209e-06, "loss": 17.1605, "step": 16898 }, { "epoch": 0.30890014074981265, "grad_norm": 6.384977825418099, "learning_rate": 8.095364047465333e-06, "loss": 17.379, "step": 16899 }, { "epoch": 0.3089184199462592, "grad_norm": 6.976100335057817, "learning_rate": 8.095131574235473e-06, "loss": 17.9314, "step": 16900 }, { "epoch": 0.30893669914270566, "grad_norm": 6.306299971912668, "learning_rate": 8.094899090157447e-06, "loss": 17.5288, "step": 16901 }, { "epoch": 0.3089549783391522, "grad_norm": 10.584265253117026, "learning_rate": 8.094666595232067e-06, "loss": 17.918, "step": 16902 }, { "epoch": 0.30897325753559873, "grad_norm": 5.874533561464786, "learning_rate": 8.094434089460152e-06, "loss": 17.2669, "step": 16903 }, { "epoch": 0.30899153673204527, "grad_norm": 6.055407560537636, "learning_rate": 8.094201572842511e-06, "loss": 16.9438, "step": 16904 }, { "epoch": 0.3090098159284918, "grad_norm": 7.248798402285025, "learning_rate": 8.093969045379964e-06, "loss": 18.0245, "step": 16905 }, { "epoch": 0.3090280951249383, "grad_norm": 6.966026641683622, "learning_rate": 8.093736507073325e-06, "loss": 17.3831, "step": 16906 }, { "epoch": 0.3090463743213848, "grad_norm": 7.450958004208825, "learning_rate": 8.093503957923404e-06, "loss": 18.1105, "step": 16907 }, { "epoch": 0.30906465351783136, "grad_norm": 6.392306557549609, "learning_rate": 8.093271397931022e-06, "loss": 17.6111, "step": 16908 }, { "epoch": 0.3090829327142779, "grad_norm": 5.670538844079594, "learning_rate": 8.093038827096993e-06, "loss": 17.0552, "step": 16909 }, { "epoch": 0.3091012119107244, "grad_norm": 7.512646870486651, "learning_rate": 8.092806245422131e-06, "loss": 17.7541, "step": 16910 }, { "epoch": 0.3091194911071709, "grad_norm": 6.903661762816213, "learning_rate": 8.092573652907252e-06, "loss": 17.8163, "step": 16911 }, { "epoch": 0.30913777030361744, "grad_norm": 7.138344336056069, "learning_rate": 8.092341049553168e-06, "loss": 17.9178, "step": 16912 }, { "epoch": 0.309156049500064, "grad_norm": 6.014316542066885, "learning_rate": 8.0921084353607e-06, "loss": 17.5349, "step": 16913 }, { "epoch": 0.3091743286965105, "grad_norm": 6.103595161617867, "learning_rate": 8.091875810330658e-06, "loss": 17.3808, "step": 16914 }, { "epoch": 0.30919260789295705, "grad_norm": 5.635502138542043, "learning_rate": 8.09164317446386e-06, "loss": 17.4035, "step": 16915 }, { "epoch": 0.30921088708940353, "grad_norm": 6.6603190080422365, "learning_rate": 8.091410527761123e-06, "loss": 17.4741, "step": 16916 }, { "epoch": 0.30922916628585007, "grad_norm": 7.082998085470822, "learning_rate": 8.09117787022326e-06, "loss": 17.6634, "step": 16917 }, { "epoch": 0.3092474454822966, "grad_norm": 7.799159186861457, "learning_rate": 8.090945201851086e-06, "loss": 18.1, "step": 16918 }, { "epoch": 0.30926572467874314, "grad_norm": 7.639306056104836, "learning_rate": 8.090712522645417e-06, "loss": 17.9958, "step": 16919 }, { "epoch": 0.3092840038751897, "grad_norm": 6.07550528023007, "learning_rate": 8.090479832607069e-06, "loss": 17.4038, "step": 16920 }, { "epoch": 0.30930228307163615, "grad_norm": 7.2802138718397345, "learning_rate": 8.090247131736857e-06, "loss": 18.0411, "step": 16921 }, { "epoch": 0.3093205622680827, "grad_norm": 7.460016928913056, "learning_rate": 8.090014420035597e-06, "loss": 18.155, "step": 16922 }, { "epoch": 0.3093388414645292, "grad_norm": 6.719145207687469, "learning_rate": 8.089781697504105e-06, "loss": 17.7742, "step": 16923 }, { "epoch": 0.30935712066097576, "grad_norm": 6.374124792853929, "learning_rate": 8.089548964143196e-06, "loss": 17.6679, "step": 16924 }, { "epoch": 0.3093753998574223, "grad_norm": 6.882657522479925, "learning_rate": 8.089316219953687e-06, "loss": 17.5267, "step": 16925 }, { "epoch": 0.3093936790538688, "grad_norm": 7.636590366002228, "learning_rate": 8.089083464936392e-06, "loss": 17.5369, "step": 16926 }, { "epoch": 0.3094119582503153, "grad_norm": 6.799639629794637, "learning_rate": 8.088850699092127e-06, "loss": 17.92, "step": 16927 }, { "epoch": 0.30943023744676185, "grad_norm": 6.764508225258617, "learning_rate": 8.08861792242171e-06, "loss": 17.5539, "step": 16928 }, { "epoch": 0.3094485166432084, "grad_norm": 7.029403941083494, "learning_rate": 8.088385134925953e-06, "loss": 17.6364, "step": 16929 }, { "epoch": 0.30946679583965486, "grad_norm": 6.558432469824841, "learning_rate": 8.088152336605674e-06, "loss": 17.4582, "step": 16930 }, { "epoch": 0.3094850750361014, "grad_norm": 6.7557714165906955, "learning_rate": 8.08791952746169e-06, "loss": 17.7139, "step": 16931 }, { "epoch": 0.30950335423254793, "grad_norm": 6.897953373824674, "learning_rate": 8.087686707494817e-06, "loss": 17.8887, "step": 16932 }, { "epoch": 0.30952163342899447, "grad_norm": 6.63615805785581, "learning_rate": 8.087453876705868e-06, "loss": 17.7007, "step": 16933 }, { "epoch": 0.309539912625441, "grad_norm": 6.515605327772047, "learning_rate": 8.087221035095662e-06, "loss": 17.4209, "step": 16934 }, { "epoch": 0.3095581918218875, "grad_norm": 5.315636922119088, "learning_rate": 8.086988182665016e-06, "loss": 16.9972, "step": 16935 }, { "epoch": 0.309576471018334, "grad_norm": 6.8677499121840775, "learning_rate": 8.086755319414743e-06, "loss": 17.692, "step": 16936 }, { "epoch": 0.30959475021478056, "grad_norm": 7.371046058676606, "learning_rate": 8.08652244534566e-06, "loss": 18.0294, "step": 16937 }, { "epoch": 0.3096130294112271, "grad_norm": 7.992511094900772, "learning_rate": 8.086289560458583e-06, "loss": 17.8508, "step": 16938 }, { "epoch": 0.3096313086076736, "grad_norm": 6.380923803072536, "learning_rate": 8.086056664754328e-06, "loss": 17.919, "step": 16939 }, { "epoch": 0.3096495878041201, "grad_norm": 6.475349880439513, "learning_rate": 8.085823758233716e-06, "loss": 17.7402, "step": 16940 }, { "epoch": 0.30966786700056664, "grad_norm": 6.118767857981034, "learning_rate": 8.085590840897558e-06, "loss": 17.4519, "step": 16941 }, { "epoch": 0.3096861461970132, "grad_norm": 6.533876579597839, "learning_rate": 8.08535791274667e-06, "loss": 17.2101, "step": 16942 }, { "epoch": 0.3097044253934597, "grad_norm": 6.731862506967125, "learning_rate": 8.085124973781872e-06, "loss": 17.3575, "step": 16943 }, { "epoch": 0.30972270458990625, "grad_norm": 7.39391206942118, "learning_rate": 8.084892024003978e-06, "loss": 17.8833, "step": 16944 }, { "epoch": 0.30974098378635273, "grad_norm": 6.661848016390472, "learning_rate": 8.084659063413805e-06, "loss": 17.6427, "step": 16945 }, { "epoch": 0.30975926298279927, "grad_norm": 6.019351705212323, "learning_rate": 8.08442609201217e-06, "loss": 17.5766, "step": 16946 }, { "epoch": 0.3097775421792458, "grad_norm": 6.303805987317659, "learning_rate": 8.084193109799889e-06, "loss": 17.7708, "step": 16947 }, { "epoch": 0.30979582137569234, "grad_norm": 8.651229911106629, "learning_rate": 8.08396011677778e-06, "loss": 18.3284, "step": 16948 }, { "epoch": 0.3098141005721389, "grad_norm": 7.170146505652532, "learning_rate": 8.083727112946657e-06, "loss": 17.5075, "step": 16949 }, { "epoch": 0.30983237976858535, "grad_norm": 5.813648845249317, "learning_rate": 8.083494098307338e-06, "loss": 17.4887, "step": 16950 }, { "epoch": 0.3098506589650319, "grad_norm": 5.079115814545762, "learning_rate": 8.08326107286064e-06, "loss": 17.0961, "step": 16951 }, { "epoch": 0.3098689381614784, "grad_norm": 6.967427799136651, "learning_rate": 8.08302803660738e-06, "loss": 17.8371, "step": 16952 }, { "epoch": 0.30988721735792496, "grad_norm": 5.887776100616813, "learning_rate": 8.082794989548372e-06, "loss": 17.3094, "step": 16953 }, { "epoch": 0.3099054965543715, "grad_norm": 5.590935783414069, "learning_rate": 8.08256193168444e-06, "loss": 17.3391, "step": 16954 }, { "epoch": 0.309923775750818, "grad_norm": 5.9681028865587935, "learning_rate": 8.082328863016392e-06, "loss": 17.4024, "step": 16955 }, { "epoch": 0.3099420549472645, "grad_norm": 7.224395964372872, "learning_rate": 8.08209578354505e-06, "loss": 17.9612, "step": 16956 }, { "epoch": 0.30996033414371105, "grad_norm": 6.998488998901023, "learning_rate": 8.081862693271228e-06, "loss": 17.6941, "step": 16957 }, { "epoch": 0.3099786133401576, "grad_norm": 6.834681750089619, "learning_rate": 8.081629592195748e-06, "loss": 17.6734, "step": 16958 }, { "epoch": 0.3099968925366041, "grad_norm": 6.348617706162432, "learning_rate": 8.08139648031942e-06, "loss": 17.6922, "step": 16959 }, { "epoch": 0.3100151717330506, "grad_norm": 6.926850762857286, "learning_rate": 8.081163357643067e-06, "loss": 17.7738, "step": 16960 }, { "epoch": 0.31003345092949713, "grad_norm": 5.575484399543014, "learning_rate": 8.080930224167505e-06, "loss": 17.1479, "step": 16961 }, { "epoch": 0.31005173012594367, "grad_norm": 6.297593836898856, "learning_rate": 8.080697079893547e-06, "loss": 17.2397, "step": 16962 }, { "epoch": 0.3100700093223902, "grad_norm": 5.373837482153073, "learning_rate": 8.080463924822016e-06, "loss": 17.2421, "step": 16963 }, { "epoch": 0.3100882885188367, "grad_norm": 6.251755688410515, "learning_rate": 8.080230758953725e-06, "loss": 17.6207, "step": 16964 }, { "epoch": 0.3101065677152832, "grad_norm": 5.887693642675731, "learning_rate": 8.07999758228949e-06, "loss": 17.246, "step": 16965 }, { "epoch": 0.31012484691172976, "grad_norm": 6.622014302673911, "learning_rate": 8.079764394830132e-06, "loss": 17.7236, "step": 16966 }, { "epoch": 0.3101431261081763, "grad_norm": 6.26840513401978, "learning_rate": 8.079531196576468e-06, "loss": 17.4721, "step": 16967 }, { "epoch": 0.31016140530462283, "grad_norm": 7.617899115028561, "learning_rate": 8.079297987529315e-06, "loss": 18.0188, "step": 16968 }, { "epoch": 0.3101796845010693, "grad_norm": 6.255883472661476, "learning_rate": 8.079064767689489e-06, "loss": 17.4223, "step": 16969 }, { "epoch": 0.31019796369751584, "grad_norm": 5.44702046069408, "learning_rate": 8.078831537057809e-06, "loss": 17.2657, "step": 16970 }, { "epoch": 0.3102162428939624, "grad_norm": 6.8017262707599375, "learning_rate": 8.07859829563509e-06, "loss": 17.8041, "step": 16971 }, { "epoch": 0.3102345220904089, "grad_norm": 7.582731197635068, "learning_rate": 8.078365043422153e-06, "loss": 17.7339, "step": 16972 }, { "epoch": 0.31025280128685545, "grad_norm": 6.277249513335438, "learning_rate": 8.078131780419811e-06, "loss": 17.3556, "step": 16973 }, { "epoch": 0.31027108048330193, "grad_norm": 6.069702885550678, "learning_rate": 8.077898506628887e-06, "loss": 17.4744, "step": 16974 }, { "epoch": 0.31028935967974847, "grad_norm": 6.704836121399122, "learning_rate": 8.077665222050195e-06, "loss": 17.6548, "step": 16975 }, { "epoch": 0.310307638876195, "grad_norm": 6.304636426754897, "learning_rate": 8.077431926684552e-06, "loss": 17.4647, "step": 16976 }, { "epoch": 0.31032591807264154, "grad_norm": 6.967132063925525, "learning_rate": 8.077198620532779e-06, "loss": 17.773, "step": 16977 }, { "epoch": 0.3103441972690881, "grad_norm": 7.629948796757115, "learning_rate": 8.076965303595692e-06, "loss": 17.9232, "step": 16978 }, { "epoch": 0.31036247646553455, "grad_norm": 7.415482202614117, "learning_rate": 8.076731975874107e-06, "loss": 18.1143, "step": 16979 }, { "epoch": 0.3103807556619811, "grad_norm": 6.065671198356489, "learning_rate": 8.076498637368844e-06, "loss": 17.5238, "step": 16980 }, { "epoch": 0.3103990348584276, "grad_norm": 6.143485099025842, "learning_rate": 8.07626528808072e-06, "loss": 17.4993, "step": 16981 }, { "epoch": 0.31041731405487416, "grad_norm": 6.107688265313014, "learning_rate": 8.076031928010554e-06, "loss": 17.4624, "step": 16982 }, { "epoch": 0.3104355932513207, "grad_norm": 9.391764640027931, "learning_rate": 8.075798557159163e-06, "loss": 18.4911, "step": 16983 }, { "epoch": 0.3104538724477672, "grad_norm": 6.056483632792479, "learning_rate": 8.075565175527365e-06, "loss": 17.4349, "step": 16984 }, { "epoch": 0.3104721516442137, "grad_norm": 5.903959445175416, "learning_rate": 8.075331783115977e-06, "loss": 17.3011, "step": 16985 }, { "epoch": 0.31049043084066025, "grad_norm": 5.315413721020948, "learning_rate": 8.075098379925818e-06, "loss": 17.0139, "step": 16986 }, { "epoch": 0.3105087100371068, "grad_norm": 7.277900017474505, "learning_rate": 8.074864965957706e-06, "loss": 17.9004, "step": 16987 }, { "epoch": 0.3105269892335533, "grad_norm": 6.132393360020543, "learning_rate": 8.07463154121246e-06, "loss": 17.6193, "step": 16988 }, { "epoch": 0.3105452684299998, "grad_norm": 5.281859849348049, "learning_rate": 8.074398105690897e-06, "loss": 17.1315, "step": 16989 }, { "epoch": 0.31056354762644633, "grad_norm": 7.063814869684254, "learning_rate": 8.074164659393834e-06, "loss": 17.4937, "step": 16990 }, { "epoch": 0.31058182682289287, "grad_norm": 6.327726563585208, "learning_rate": 8.073931202322092e-06, "loss": 17.6545, "step": 16991 }, { "epoch": 0.3106001060193394, "grad_norm": 6.2286458775949285, "learning_rate": 8.073697734476489e-06, "loss": 17.3014, "step": 16992 }, { "epoch": 0.31061838521578594, "grad_norm": 6.557859461936198, "learning_rate": 8.07346425585784e-06, "loss": 17.4907, "step": 16993 }, { "epoch": 0.3106366644122324, "grad_norm": 6.85632582671068, "learning_rate": 8.073230766466966e-06, "loss": 17.8222, "step": 16994 }, { "epoch": 0.31065494360867896, "grad_norm": 7.178275517134839, "learning_rate": 8.072997266304686e-06, "loss": 17.7144, "step": 16995 }, { "epoch": 0.3106732228051255, "grad_norm": 5.980530187088951, "learning_rate": 8.072763755371816e-06, "loss": 17.7154, "step": 16996 }, { "epoch": 0.31069150200157203, "grad_norm": 6.507905260479088, "learning_rate": 8.072530233669176e-06, "loss": 17.8949, "step": 16997 }, { "epoch": 0.3107097811980185, "grad_norm": 7.817112705747621, "learning_rate": 8.072296701197584e-06, "loss": 18.3349, "step": 16998 }, { "epoch": 0.31072806039446504, "grad_norm": 6.768726074333093, "learning_rate": 8.07206315795786e-06, "loss": 18.1172, "step": 16999 }, { "epoch": 0.3107463395909116, "grad_norm": 7.226745209650806, "learning_rate": 8.071829603950821e-06, "loss": 18.185, "step": 17000 }, { "epoch": 0.3107646187873581, "grad_norm": 5.905469271144069, "learning_rate": 8.071596039177284e-06, "loss": 17.3604, "step": 17001 }, { "epoch": 0.31078289798380465, "grad_norm": 6.731758686543261, "learning_rate": 8.071362463638071e-06, "loss": 17.4995, "step": 17002 }, { "epoch": 0.31080117718025113, "grad_norm": 7.016546984059646, "learning_rate": 8.071128877333999e-06, "loss": 17.7842, "step": 17003 }, { "epoch": 0.31081945637669767, "grad_norm": 6.972725771772177, "learning_rate": 8.070895280265884e-06, "loss": 18.0233, "step": 17004 }, { "epoch": 0.3108377355731442, "grad_norm": 6.619120149588867, "learning_rate": 8.07066167243455e-06, "loss": 17.742, "step": 17005 }, { "epoch": 0.31085601476959074, "grad_norm": 7.179735912054314, "learning_rate": 8.070428053840816e-06, "loss": 17.9021, "step": 17006 }, { "epoch": 0.3108742939660373, "grad_norm": 6.547854175047619, "learning_rate": 8.070194424485494e-06, "loss": 17.6715, "step": 17007 }, { "epoch": 0.31089257316248375, "grad_norm": 6.141519537589516, "learning_rate": 8.069960784369407e-06, "loss": 17.4044, "step": 17008 }, { "epoch": 0.3109108523589303, "grad_norm": 6.107340651125197, "learning_rate": 8.069727133493376e-06, "loss": 17.4647, "step": 17009 }, { "epoch": 0.3109291315553768, "grad_norm": 6.769323403249451, "learning_rate": 8.069493471858216e-06, "loss": 17.6992, "step": 17010 }, { "epoch": 0.31094741075182336, "grad_norm": 5.643737975840195, "learning_rate": 8.06925979946475e-06, "loss": 17.2691, "step": 17011 }, { "epoch": 0.3109656899482699, "grad_norm": 6.782297245436195, "learning_rate": 8.069026116313791e-06, "loss": 17.8015, "step": 17012 }, { "epoch": 0.3109839691447164, "grad_norm": 6.4714854196737175, "learning_rate": 8.068792422406167e-06, "loss": 17.439, "step": 17013 }, { "epoch": 0.3110022483411629, "grad_norm": 6.747759203050445, "learning_rate": 8.068558717742688e-06, "loss": 17.7828, "step": 17014 }, { "epoch": 0.31102052753760945, "grad_norm": 7.009226825907778, "learning_rate": 8.068325002324177e-06, "loss": 17.5213, "step": 17015 }, { "epoch": 0.311038806734056, "grad_norm": 5.484043950856844, "learning_rate": 8.068091276151454e-06, "loss": 17.116, "step": 17016 }, { "epoch": 0.3110570859305025, "grad_norm": 6.943562688903507, "learning_rate": 8.067857539225338e-06, "loss": 17.909, "step": 17017 }, { "epoch": 0.311075365126949, "grad_norm": 7.288566353969405, "learning_rate": 8.067623791546646e-06, "loss": 17.8718, "step": 17018 }, { "epoch": 0.31109364432339554, "grad_norm": 8.31020159525137, "learning_rate": 8.0673900331162e-06, "loss": 18.0491, "step": 17019 }, { "epoch": 0.31111192351984207, "grad_norm": 6.679492595636824, "learning_rate": 8.067156263934818e-06, "loss": 17.6752, "step": 17020 }, { "epoch": 0.3111302027162886, "grad_norm": 6.486200578548156, "learning_rate": 8.066922484003319e-06, "loss": 17.8335, "step": 17021 }, { "epoch": 0.31114848191273514, "grad_norm": 6.565739699048459, "learning_rate": 8.066688693322523e-06, "loss": 17.5224, "step": 17022 }, { "epoch": 0.3111667611091816, "grad_norm": 6.113042705635595, "learning_rate": 8.06645489189325e-06, "loss": 17.7832, "step": 17023 }, { "epoch": 0.31118504030562816, "grad_norm": 6.835543180942098, "learning_rate": 8.066221079716317e-06, "loss": 17.4553, "step": 17024 }, { "epoch": 0.3112033195020747, "grad_norm": 5.97030877167702, "learning_rate": 8.065987256792547e-06, "loss": 17.1874, "step": 17025 }, { "epoch": 0.31122159869852123, "grad_norm": 6.18769337467142, "learning_rate": 8.065753423122755e-06, "loss": 17.3374, "step": 17026 }, { "epoch": 0.31123987789496776, "grad_norm": 6.025111695620645, "learning_rate": 8.065519578707766e-06, "loss": 17.1555, "step": 17027 }, { "epoch": 0.31125815709141424, "grad_norm": 6.695901983329723, "learning_rate": 8.065285723548398e-06, "loss": 17.8727, "step": 17028 }, { "epoch": 0.3112764362878608, "grad_norm": 8.023883962901643, "learning_rate": 8.065051857645466e-06, "loss": 18.1035, "step": 17029 }, { "epoch": 0.3112947154843073, "grad_norm": 6.898508469798952, "learning_rate": 8.064817980999794e-06, "loss": 17.9484, "step": 17030 }, { "epoch": 0.31131299468075385, "grad_norm": 6.922422156492689, "learning_rate": 8.064584093612203e-06, "loss": 17.7561, "step": 17031 }, { "epoch": 0.31133127387720033, "grad_norm": 6.895024630554713, "learning_rate": 8.064350195483509e-06, "loss": 17.9243, "step": 17032 }, { "epoch": 0.31134955307364687, "grad_norm": 9.128823275667488, "learning_rate": 8.064116286614535e-06, "loss": 18.3941, "step": 17033 }, { "epoch": 0.3113678322700934, "grad_norm": 6.110840694777033, "learning_rate": 8.063882367006098e-06, "loss": 17.4976, "step": 17034 }, { "epoch": 0.31138611146653994, "grad_norm": 6.416235495914686, "learning_rate": 8.06364843665902e-06, "loss": 17.8703, "step": 17035 }, { "epoch": 0.3114043906629865, "grad_norm": 6.764230592310325, "learning_rate": 8.063414495574118e-06, "loss": 17.8869, "step": 17036 }, { "epoch": 0.31142266985943295, "grad_norm": 6.669468797976102, "learning_rate": 8.063180543752216e-06, "loss": 17.8181, "step": 17037 }, { "epoch": 0.3114409490558795, "grad_norm": 6.2584630470038904, "learning_rate": 8.062946581194131e-06, "loss": 17.4431, "step": 17038 }, { "epoch": 0.311459228252326, "grad_norm": 9.804785148077373, "learning_rate": 8.062712607900685e-06, "loss": 18.5277, "step": 17039 }, { "epoch": 0.31147750744877256, "grad_norm": 7.235891141499928, "learning_rate": 8.062478623872698e-06, "loss": 17.916, "step": 17040 }, { "epoch": 0.3114957866452191, "grad_norm": 7.355908158041542, "learning_rate": 8.062244629110986e-06, "loss": 17.6022, "step": 17041 }, { "epoch": 0.3115140658416656, "grad_norm": 7.182154954017314, "learning_rate": 8.062010623616375e-06, "loss": 17.7456, "step": 17042 }, { "epoch": 0.3115323450381121, "grad_norm": 10.286129801372665, "learning_rate": 8.06177660738968e-06, "loss": 17.9494, "step": 17043 }, { "epoch": 0.31155062423455865, "grad_norm": 6.156373082601276, "learning_rate": 8.061542580431726e-06, "loss": 17.349, "step": 17044 }, { "epoch": 0.3115689034310052, "grad_norm": 6.827769349808465, "learning_rate": 8.06130854274333e-06, "loss": 17.6985, "step": 17045 }, { "epoch": 0.3115871826274517, "grad_norm": 7.673966913137476, "learning_rate": 8.061074494325315e-06, "loss": 17.7679, "step": 17046 }, { "epoch": 0.3116054618238982, "grad_norm": 9.078005760136998, "learning_rate": 8.060840435178498e-06, "loss": 18.4449, "step": 17047 }, { "epoch": 0.31162374102034474, "grad_norm": 5.808936845800925, "learning_rate": 8.0606063653037e-06, "loss": 17.1379, "step": 17048 }, { "epoch": 0.31164202021679127, "grad_norm": 10.06711807599597, "learning_rate": 8.060372284701743e-06, "loss": 18.286, "step": 17049 }, { "epoch": 0.3116602994132378, "grad_norm": 5.761124065838537, "learning_rate": 8.060138193373446e-06, "loss": 17.0214, "step": 17050 }, { "epoch": 0.31167857860968434, "grad_norm": 6.014642946448462, "learning_rate": 8.059904091319633e-06, "loss": 17.4176, "step": 17051 }, { "epoch": 0.3116968578061308, "grad_norm": 5.110580803887255, "learning_rate": 8.059669978541118e-06, "loss": 16.9478, "step": 17052 }, { "epoch": 0.31171513700257736, "grad_norm": 6.358176665756766, "learning_rate": 8.059435855038727e-06, "loss": 17.3045, "step": 17053 }, { "epoch": 0.3117334161990239, "grad_norm": 6.821562150145217, "learning_rate": 8.05920172081328e-06, "loss": 17.5357, "step": 17054 }, { "epoch": 0.31175169539547043, "grad_norm": 7.502017033052161, "learning_rate": 8.058967575865593e-06, "loss": 17.9821, "step": 17055 }, { "epoch": 0.31176997459191697, "grad_norm": 6.404123994690587, "learning_rate": 8.058733420196492e-06, "loss": 17.3785, "step": 17056 }, { "epoch": 0.31178825378836345, "grad_norm": 5.824909042286776, "learning_rate": 8.058499253806797e-06, "loss": 17.0588, "step": 17057 }, { "epoch": 0.31180653298481, "grad_norm": 8.893768353879862, "learning_rate": 8.058265076697327e-06, "loss": 18.3984, "step": 17058 }, { "epoch": 0.3118248121812565, "grad_norm": 8.36422672697942, "learning_rate": 8.058030888868902e-06, "loss": 18.1194, "step": 17059 }, { "epoch": 0.31184309137770305, "grad_norm": 7.707476457887245, "learning_rate": 8.057796690322345e-06, "loss": 18.1633, "step": 17060 }, { "epoch": 0.3118613705741496, "grad_norm": 5.794932126433034, "learning_rate": 8.057562481058476e-06, "loss": 17.2252, "step": 17061 }, { "epoch": 0.31187964977059607, "grad_norm": 6.579128480537068, "learning_rate": 8.057328261078116e-06, "loss": 17.6532, "step": 17062 }, { "epoch": 0.3118979289670426, "grad_norm": 5.953515237473084, "learning_rate": 8.057094030382084e-06, "loss": 17.3097, "step": 17063 }, { "epoch": 0.31191620816348914, "grad_norm": 7.242998926533677, "learning_rate": 8.056859788971206e-06, "loss": 17.637, "step": 17064 }, { "epoch": 0.3119344873599357, "grad_norm": 6.669121683718374, "learning_rate": 8.056625536846297e-06, "loss": 17.7112, "step": 17065 }, { "epoch": 0.31195276655638216, "grad_norm": 6.234045316484745, "learning_rate": 8.056391274008182e-06, "loss": 17.5276, "step": 17066 }, { "epoch": 0.3119710457528287, "grad_norm": 5.99918087491853, "learning_rate": 8.05615700045768e-06, "loss": 17.3289, "step": 17067 }, { "epoch": 0.3119893249492752, "grad_norm": 7.395409281321343, "learning_rate": 8.055922716195614e-06, "loss": 17.9555, "step": 17068 }, { "epoch": 0.31200760414572176, "grad_norm": 6.560024466253384, "learning_rate": 8.055688421222802e-06, "loss": 17.689, "step": 17069 }, { "epoch": 0.3120258833421683, "grad_norm": 6.8100888844054595, "learning_rate": 8.05545411554007e-06, "loss": 17.4748, "step": 17070 }, { "epoch": 0.3120441625386148, "grad_norm": 6.8086225586593345, "learning_rate": 8.055219799148236e-06, "loss": 17.5875, "step": 17071 }, { "epoch": 0.3120624417350613, "grad_norm": 7.076630506537384, "learning_rate": 8.05498547204812e-06, "loss": 17.9189, "step": 17072 }, { "epoch": 0.31208072093150785, "grad_norm": 7.134796225580065, "learning_rate": 8.054751134240545e-06, "loss": 17.9532, "step": 17073 }, { "epoch": 0.3120990001279544, "grad_norm": 6.903890762980463, "learning_rate": 8.054516785726333e-06, "loss": 17.9626, "step": 17074 }, { "epoch": 0.3121172793244009, "grad_norm": 6.892699558993286, "learning_rate": 8.054282426506306e-06, "loss": 17.8961, "step": 17075 }, { "epoch": 0.3121355585208474, "grad_norm": 6.06007985064659, "learning_rate": 8.054048056581283e-06, "loss": 17.4805, "step": 17076 }, { "epoch": 0.31215383771729394, "grad_norm": 6.5185241906590194, "learning_rate": 8.053813675952085e-06, "loss": 17.4921, "step": 17077 }, { "epoch": 0.31217211691374047, "grad_norm": 7.249273040241578, "learning_rate": 8.053579284619538e-06, "loss": 17.394, "step": 17078 }, { "epoch": 0.312190396110187, "grad_norm": 6.445044732571578, "learning_rate": 8.05334488258446e-06, "loss": 17.4595, "step": 17079 }, { "epoch": 0.31220867530663354, "grad_norm": 6.187669512889358, "learning_rate": 8.053110469847671e-06, "loss": 17.4792, "step": 17080 }, { "epoch": 0.31222695450308, "grad_norm": 6.122343555267942, "learning_rate": 8.052876046409997e-06, "loss": 17.619, "step": 17081 }, { "epoch": 0.31224523369952656, "grad_norm": 6.058224000590358, "learning_rate": 8.052641612272255e-06, "loss": 17.3695, "step": 17082 }, { "epoch": 0.3122635128959731, "grad_norm": 6.11590720237535, "learning_rate": 8.052407167435271e-06, "loss": 17.5494, "step": 17083 }, { "epoch": 0.31228179209241963, "grad_norm": 5.560146633478227, "learning_rate": 8.052172711899864e-06, "loss": 17.3819, "step": 17084 }, { "epoch": 0.31230007128886617, "grad_norm": 5.705357822499922, "learning_rate": 8.051938245666857e-06, "loss": 17.4352, "step": 17085 }, { "epoch": 0.31231835048531265, "grad_norm": 5.918455183105127, "learning_rate": 8.051703768737072e-06, "loss": 17.2833, "step": 17086 }, { "epoch": 0.3123366296817592, "grad_norm": 7.623831503881604, "learning_rate": 8.051469281111329e-06, "loss": 18.0024, "step": 17087 }, { "epoch": 0.3123549088782057, "grad_norm": 7.182707774727692, "learning_rate": 8.05123478279045e-06, "loss": 17.6483, "step": 17088 }, { "epoch": 0.31237318807465225, "grad_norm": 8.123794902176977, "learning_rate": 8.05100027377526e-06, "loss": 18.1358, "step": 17089 }, { "epoch": 0.3123914672710988, "grad_norm": 7.305505632125513, "learning_rate": 8.050765754066577e-06, "loss": 17.5472, "step": 17090 }, { "epoch": 0.31240974646754527, "grad_norm": 10.310046563194174, "learning_rate": 8.050531223665226e-06, "loss": 18.3201, "step": 17091 }, { "epoch": 0.3124280256639918, "grad_norm": 7.293663182369382, "learning_rate": 8.050296682572028e-06, "loss": 17.6776, "step": 17092 }, { "epoch": 0.31244630486043834, "grad_norm": 7.711378309642685, "learning_rate": 8.050062130787803e-06, "loss": 18.0298, "step": 17093 }, { "epoch": 0.3124645840568849, "grad_norm": 8.998707061510684, "learning_rate": 8.049827568313377e-06, "loss": 18.6374, "step": 17094 }, { "epoch": 0.3124828632533314, "grad_norm": 6.572040902501415, "learning_rate": 8.049592995149568e-06, "loss": 17.5308, "step": 17095 }, { "epoch": 0.3125011424497779, "grad_norm": 7.654196776411626, "learning_rate": 8.049358411297203e-06, "loss": 18.1515, "step": 17096 }, { "epoch": 0.3125194216462244, "grad_norm": 5.888627620214243, "learning_rate": 8.049123816757098e-06, "loss": 17.5023, "step": 17097 }, { "epoch": 0.31253770084267096, "grad_norm": 5.840214153242996, "learning_rate": 8.04888921153008e-06, "loss": 17.5331, "step": 17098 }, { "epoch": 0.3125559800391175, "grad_norm": 6.845758804187963, "learning_rate": 8.048654595616972e-06, "loss": 17.4805, "step": 17099 }, { "epoch": 0.312574259235564, "grad_norm": 6.997364797480822, "learning_rate": 8.04841996901859e-06, "loss": 18.0936, "step": 17100 }, { "epoch": 0.3125925384320105, "grad_norm": 7.516464323446667, "learning_rate": 8.048185331735764e-06, "loss": 17.8479, "step": 17101 }, { "epoch": 0.31261081762845705, "grad_norm": 7.688519225306918, "learning_rate": 8.047950683769312e-06, "loss": 17.6508, "step": 17102 }, { "epoch": 0.3126290968249036, "grad_norm": 6.727504229055415, "learning_rate": 8.047716025120058e-06, "loss": 17.6344, "step": 17103 }, { "epoch": 0.3126473760213501, "grad_norm": 6.331388539219552, "learning_rate": 8.047481355788822e-06, "loss": 17.6653, "step": 17104 }, { "epoch": 0.3126656552177966, "grad_norm": 6.8055450743979415, "learning_rate": 8.047246675776428e-06, "loss": 17.6641, "step": 17105 }, { "epoch": 0.31268393441424314, "grad_norm": 7.724344148380758, "learning_rate": 8.047011985083701e-06, "loss": 18.4167, "step": 17106 }, { "epoch": 0.31270221361068967, "grad_norm": 6.457607376909295, "learning_rate": 8.04677728371146e-06, "loss": 17.8015, "step": 17107 }, { "epoch": 0.3127204928071362, "grad_norm": 10.292345069602899, "learning_rate": 8.04654257166053e-06, "loss": 18.1682, "step": 17108 }, { "epoch": 0.31273877200358274, "grad_norm": 7.2397095021676385, "learning_rate": 8.046307848931733e-06, "loss": 17.7562, "step": 17109 }, { "epoch": 0.3127570512000292, "grad_norm": 7.833608664386632, "learning_rate": 8.04607311552589e-06, "loss": 17.8266, "step": 17110 }, { "epoch": 0.31277533039647576, "grad_norm": 9.656642046194163, "learning_rate": 8.045838371443826e-06, "loss": 17.5523, "step": 17111 }, { "epoch": 0.3127936095929223, "grad_norm": 7.2585431274803085, "learning_rate": 8.045603616686362e-06, "loss": 17.7699, "step": 17112 }, { "epoch": 0.31281188878936883, "grad_norm": 6.917059958259703, "learning_rate": 8.045368851254322e-06, "loss": 17.547, "step": 17113 }, { "epoch": 0.31283016798581537, "grad_norm": 7.219994130213293, "learning_rate": 8.04513407514853e-06, "loss": 17.5624, "step": 17114 }, { "epoch": 0.31284844718226185, "grad_norm": 9.500891634754796, "learning_rate": 8.044899288369804e-06, "loss": 18.6027, "step": 17115 }, { "epoch": 0.3128667263787084, "grad_norm": 7.287378731024665, "learning_rate": 8.044664490918972e-06, "loss": 17.9434, "step": 17116 }, { "epoch": 0.3128850055751549, "grad_norm": 7.479277181987813, "learning_rate": 8.044429682796855e-06, "loss": 18.0854, "step": 17117 }, { "epoch": 0.31290328477160145, "grad_norm": 5.413721045579754, "learning_rate": 8.044194864004276e-06, "loss": 17.1874, "step": 17118 }, { "epoch": 0.312921563968048, "grad_norm": 8.020478219000237, "learning_rate": 8.043960034542058e-06, "loss": 18.4478, "step": 17119 }, { "epoch": 0.31293984316449447, "grad_norm": 6.171454756272949, "learning_rate": 8.043725194411025e-06, "loss": 17.6453, "step": 17120 }, { "epoch": 0.312958122360941, "grad_norm": 7.381828581592629, "learning_rate": 8.043490343612e-06, "loss": 17.5644, "step": 17121 }, { "epoch": 0.31297640155738754, "grad_norm": 5.4404574332533775, "learning_rate": 8.043255482145804e-06, "loss": 17.1108, "step": 17122 }, { "epoch": 0.3129946807538341, "grad_norm": 8.279403270053606, "learning_rate": 8.043020610013261e-06, "loss": 18.4835, "step": 17123 }, { "epoch": 0.3130129599502806, "grad_norm": 6.7496167025061675, "learning_rate": 8.042785727215196e-06, "loss": 17.4873, "step": 17124 }, { "epoch": 0.3130312391467271, "grad_norm": 6.016379324428997, "learning_rate": 8.042550833752431e-06, "loss": 17.5257, "step": 17125 }, { "epoch": 0.3130495183431736, "grad_norm": 5.828238094728282, "learning_rate": 8.042315929625789e-06, "loss": 17.3178, "step": 17126 }, { "epoch": 0.31306779753962016, "grad_norm": 6.058292426814321, "learning_rate": 8.042081014836094e-06, "loss": 17.5104, "step": 17127 }, { "epoch": 0.3130860767360667, "grad_norm": 5.795678889764795, "learning_rate": 8.041846089384169e-06, "loss": 17.2683, "step": 17128 }, { "epoch": 0.31310435593251323, "grad_norm": 6.838414188100603, "learning_rate": 8.041611153270837e-06, "loss": 17.5137, "step": 17129 }, { "epoch": 0.3131226351289597, "grad_norm": 7.508957516597827, "learning_rate": 8.041376206496922e-06, "loss": 18.0325, "step": 17130 }, { "epoch": 0.31314091432540625, "grad_norm": 5.947618787572319, "learning_rate": 8.041141249063249e-06, "loss": 17.4239, "step": 17131 }, { "epoch": 0.3131591935218528, "grad_norm": 6.988214297935935, "learning_rate": 8.040906280970637e-06, "loss": 17.8497, "step": 17132 }, { "epoch": 0.3131774727182993, "grad_norm": 7.0904501986832065, "learning_rate": 8.040671302219915e-06, "loss": 17.9897, "step": 17133 }, { "epoch": 0.3131957519147458, "grad_norm": 7.551212912243727, "learning_rate": 8.040436312811902e-06, "loss": 18.2153, "step": 17134 }, { "epoch": 0.31321403111119234, "grad_norm": 7.66120681501316, "learning_rate": 8.040201312747425e-06, "loss": 17.5864, "step": 17135 }, { "epoch": 0.3132323103076389, "grad_norm": 6.689147304740825, "learning_rate": 8.039966302027305e-06, "loss": 17.8764, "step": 17136 }, { "epoch": 0.3132505895040854, "grad_norm": 6.95603386516969, "learning_rate": 8.039731280652368e-06, "loss": 17.8554, "step": 17137 }, { "epoch": 0.31326886870053194, "grad_norm": 7.202105499569859, "learning_rate": 8.039496248623436e-06, "loss": 18.0454, "step": 17138 }, { "epoch": 0.3132871478969784, "grad_norm": 7.411083961802374, "learning_rate": 8.03926120594133e-06, "loss": 17.7921, "step": 17139 }, { "epoch": 0.31330542709342496, "grad_norm": 7.784527847208358, "learning_rate": 8.039026152606883e-06, "loss": 18.0465, "step": 17140 }, { "epoch": 0.3133237062898715, "grad_norm": 6.618673361962217, "learning_rate": 8.038791088620909e-06, "loss": 17.4875, "step": 17141 }, { "epoch": 0.31334198548631803, "grad_norm": 5.812691420482996, "learning_rate": 8.038556013984239e-06, "loss": 17.3837, "step": 17142 }, { "epoch": 0.31336026468276457, "grad_norm": 7.11314530167224, "learning_rate": 8.038320928697691e-06, "loss": 17.7925, "step": 17143 }, { "epoch": 0.31337854387921105, "grad_norm": 7.7265097697341, "learning_rate": 8.038085832762095e-06, "loss": 18.3554, "step": 17144 }, { "epoch": 0.3133968230756576, "grad_norm": 5.447993047607807, "learning_rate": 8.037850726178269e-06, "loss": 17.1864, "step": 17145 }, { "epoch": 0.3134151022721041, "grad_norm": 6.728960990362715, "learning_rate": 8.037615608947041e-06, "loss": 17.7867, "step": 17146 }, { "epoch": 0.31343338146855065, "grad_norm": 7.495857224980962, "learning_rate": 8.037380481069234e-06, "loss": 17.8913, "step": 17147 }, { "epoch": 0.3134516606649972, "grad_norm": 7.049243686139036, "learning_rate": 8.037145342545671e-06, "loss": 17.8628, "step": 17148 }, { "epoch": 0.31346993986144367, "grad_norm": 4.711646748515058, "learning_rate": 8.036910193377178e-06, "loss": 16.9872, "step": 17149 }, { "epoch": 0.3134882190578902, "grad_norm": 5.493751215946904, "learning_rate": 8.036675033564579e-06, "loss": 17.0835, "step": 17150 }, { "epoch": 0.31350649825433674, "grad_norm": 5.888501284564506, "learning_rate": 8.036439863108696e-06, "loss": 17.3825, "step": 17151 }, { "epoch": 0.3135247774507833, "grad_norm": 6.046177868777849, "learning_rate": 8.036204682010355e-06, "loss": 17.332, "step": 17152 }, { "epoch": 0.3135430566472298, "grad_norm": 6.575959579527793, "learning_rate": 8.03596949027038e-06, "loss": 17.8634, "step": 17153 }, { "epoch": 0.3135613358436763, "grad_norm": 6.607339601223778, "learning_rate": 8.035734287889597e-06, "loss": 17.6993, "step": 17154 }, { "epoch": 0.3135796150401228, "grad_norm": 6.441259428163981, "learning_rate": 8.035499074868827e-06, "loss": 17.6536, "step": 17155 }, { "epoch": 0.31359789423656936, "grad_norm": 6.082592525515024, "learning_rate": 8.035263851208897e-06, "loss": 17.4241, "step": 17156 }, { "epoch": 0.3136161734330159, "grad_norm": 6.054719549805286, "learning_rate": 8.03502861691063e-06, "loss": 17.4872, "step": 17157 }, { "epoch": 0.31363445262946243, "grad_norm": 6.86476340893, "learning_rate": 8.034793371974851e-06, "loss": 17.5494, "step": 17158 }, { "epoch": 0.3136527318259089, "grad_norm": 6.060835396448324, "learning_rate": 8.034558116402386e-06, "loss": 17.4649, "step": 17159 }, { "epoch": 0.31367101102235545, "grad_norm": 7.512181779187408, "learning_rate": 8.034322850194056e-06, "loss": 17.8525, "step": 17160 }, { "epoch": 0.313689290218802, "grad_norm": 7.245479470438559, "learning_rate": 8.034087573350689e-06, "loss": 17.899, "step": 17161 }, { "epoch": 0.3137075694152485, "grad_norm": 7.1684862366743936, "learning_rate": 8.03385228587311e-06, "loss": 17.7725, "step": 17162 }, { "epoch": 0.31372584861169506, "grad_norm": 8.30358114454457, "learning_rate": 8.033616987762138e-06, "loss": 17.719, "step": 17163 }, { "epoch": 0.31374412780814154, "grad_norm": 6.560198161467762, "learning_rate": 8.033381679018605e-06, "loss": 17.4197, "step": 17164 }, { "epoch": 0.3137624070045881, "grad_norm": 7.377941426853494, "learning_rate": 8.033146359643332e-06, "loss": 17.6037, "step": 17165 }, { "epoch": 0.3137806862010346, "grad_norm": 5.557929775709622, "learning_rate": 8.032911029637143e-06, "loss": 17.1505, "step": 17166 }, { "epoch": 0.31379896539748114, "grad_norm": 6.899013606209825, "learning_rate": 8.032675689000864e-06, "loss": 17.6968, "step": 17167 }, { "epoch": 0.3138172445939276, "grad_norm": 6.5191416595380485, "learning_rate": 8.032440337735322e-06, "loss": 17.5975, "step": 17168 }, { "epoch": 0.31383552379037416, "grad_norm": 7.606407530181519, "learning_rate": 8.032204975841337e-06, "loss": 17.8917, "step": 17169 }, { "epoch": 0.3138538029868207, "grad_norm": 7.05771477842777, "learning_rate": 8.031969603319737e-06, "loss": 17.4604, "step": 17170 }, { "epoch": 0.31387208218326723, "grad_norm": 5.3322142199112195, "learning_rate": 8.031734220171349e-06, "loss": 17.1174, "step": 17171 }, { "epoch": 0.31389036137971377, "grad_norm": 7.02892773669566, "learning_rate": 8.031498826396992e-06, "loss": 17.8542, "step": 17172 }, { "epoch": 0.31390864057616025, "grad_norm": 9.054141376018855, "learning_rate": 8.031263421997497e-06, "loss": 18.8066, "step": 17173 }, { "epoch": 0.3139269197726068, "grad_norm": 6.130279888692038, "learning_rate": 8.031028006973686e-06, "loss": 17.7783, "step": 17174 }, { "epoch": 0.3139451989690533, "grad_norm": 5.995388164254831, "learning_rate": 8.030792581326388e-06, "loss": 17.4762, "step": 17175 }, { "epoch": 0.31396347816549985, "grad_norm": 5.885826294439638, "learning_rate": 8.030557145056421e-06, "loss": 17.3577, "step": 17176 }, { "epoch": 0.3139817573619464, "grad_norm": 6.60999595930151, "learning_rate": 8.030321698164616e-06, "loss": 17.6923, "step": 17177 }, { "epoch": 0.31400003655839287, "grad_norm": 6.843984019058763, "learning_rate": 8.030086240651796e-06, "loss": 17.6676, "step": 17178 }, { "epoch": 0.3140183157548394, "grad_norm": 5.883915114984477, "learning_rate": 8.029850772518787e-06, "loss": 17.2554, "step": 17179 }, { "epoch": 0.31403659495128594, "grad_norm": 6.892014099833698, "learning_rate": 8.029615293766413e-06, "loss": 17.8833, "step": 17180 }, { "epoch": 0.3140548741477325, "grad_norm": 6.339557791548552, "learning_rate": 8.029379804395501e-06, "loss": 17.4732, "step": 17181 }, { "epoch": 0.314073153344179, "grad_norm": 7.260205023241176, "learning_rate": 8.029144304406875e-06, "loss": 17.8128, "step": 17182 }, { "epoch": 0.3140914325406255, "grad_norm": 5.817146925384382, "learning_rate": 8.028908793801362e-06, "loss": 17.3207, "step": 17183 }, { "epoch": 0.31410971173707203, "grad_norm": 6.626224046216491, "learning_rate": 8.028673272579786e-06, "loss": 17.6731, "step": 17184 }, { "epoch": 0.31412799093351856, "grad_norm": 8.369654425397433, "learning_rate": 8.028437740742974e-06, "loss": 18.2265, "step": 17185 }, { "epoch": 0.3141462701299651, "grad_norm": 9.432428565195123, "learning_rate": 8.028202198291749e-06, "loss": 18.692, "step": 17186 }, { "epoch": 0.31416454932641164, "grad_norm": 4.921619769724041, "learning_rate": 8.02796664522694e-06, "loss": 16.8349, "step": 17187 }, { "epoch": 0.3141828285228581, "grad_norm": 8.626537819541797, "learning_rate": 8.027731081549368e-06, "loss": 18.0901, "step": 17188 }, { "epoch": 0.31420110771930465, "grad_norm": 7.517223221411115, "learning_rate": 8.027495507259863e-06, "loss": 17.8854, "step": 17189 }, { "epoch": 0.3142193869157512, "grad_norm": 6.503608465111523, "learning_rate": 8.027259922359248e-06, "loss": 17.7484, "step": 17190 }, { "epoch": 0.3142376661121977, "grad_norm": 6.030001566174185, "learning_rate": 8.02702432684835e-06, "loss": 17.3925, "step": 17191 }, { "epoch": 0.31425594530864426, "grad_norm": 6.216164772313948, "learning_rate": 8.026788720727997e-06, "loss": 17.5722, "step": 17192 }, { "epoch": 0.31427422450509074, "grad_norm": 5.947817891194216, "learning_rate": 8.026553103999009e-06, "loss": 17.5399, "step": 17193 }, { "epoch": 0.3142925037015373, "grad_norm": 6.695638931461703, "learning_rate": 8.026317476662215e-06, "loss": 17.8152, "step": 17194 }, { "epoch": 0.3143107828979838, "grad_norm": 7.016799302824753, "learning_rate": 8.026081838718442e-06, "loss": 17.458, "step": 17195 }, { "epoch": 0.31432906209443034, "grad_norm": 5.769310967622843, "learning_rate": 8.025846190168515e-06, "loss": 17.3009, "step": 17196 }, { "epoch": 0.3143473412908769, "grad_norm": 5.257616943946488, "learning_rate": 8.02561053101326e-06, "loss": 16.9891, "step": 17197 }, { "epoch": 0.31436562048732336, "grad_norm": 6.439660213076309, "learning_rate": 8.0253748612535e-06, "loss": 17.6261, "step": 17198 }, { "epoch": 0.3143838996837699, "grad_norm": 7.616382543381116, "learning_rate": 8.025139180890066e-06, "loss": 17.825, "step": 17199 }, { "epoch": 0.31440217888021643, "grad_norm": 6.859843867417869, "learning_rate": 8.02490348992378e-06, "loss": 17.8512, "step": 17200 }, { "epoch": 0.31442045807666297, "grad_norm": 6.012777268791715, "learning_rate": 8.024667788355473e-06, "loss": 17.4365, "step": 17201 }, { "epoch": 0.31443873727310945, "grad_norm": 7.34281656927429, "learning_rate": 8.024432076185967e-06, "loss": 18.0848, "step": 17202 }, { "epoch": 0.314457016469556, "grad_norm": 7.1363276984549735, "learning_rate": 8.024196353416085e-06, "loss": 17.8839, "step": 17203 }, { "epoch": 0.3144752956660025, "grad_norm": 6.619445406609291, "learning_rate": 8.023960620046661e-06, "loss": 17.512, "step": 17204 }, { "epoch": 0.31449357486244905, "grad_norm": 5.901356843278337, "learning_rate": 8.023724876078517e-06, "loss": 17.5128, "step": 17205 }, { "epoch": 0.3145118540588956, "grad_norm": 5.377358834635803, "learning_rate": 8.023489121512479e-06, "loss": 17.0316, "step": 17206 }, { "epoch": 0.31453013325534207, "grad_norm": 9.555391015931994, "learning_rate": 8.023253356349375e-06, "loss": 18.6005, "step": 17207 }, { "epoch": 0.3145484124517886, "grad_norm": 6.7078403560945405, "learning_rate": 8.023017580590029e-06, "loss": 17.5261, "step": 17208 }, { "epoch": 0.31456669164823514, "grad_norm": 6.47132507059488, "learning_rate": 8.022781794235268e-06, "loss": 17.8296, "step": 17209 }, { "epoch": 0.3145849708446817, "grad_norm": 8.109703898154141, "learning_rate": 8.02254599728592e-06, "loss": 18.1307, "step": 17210 }, { "epoch": 0.3146032500411282, "grad_norm": 6.5760029145144525, "learning_rate": 8.022310189742812e-06, "loss": 17.5782, "step": 17211 }, { "epoch": 0.3146215292375747, "grad_norm": 8.482911353667058, "learning_rate": 8.022074371606767e-06, "loss": 18.6695, "step": 17212 }, { "epoch": 0.31463980843402123, "grad_norm": 6.977732439286522, "learning_rate": 8.021838542878616e-06, "loss": 17.9351, "step": 17213 }, { "epoch": 0.31465808763046776, "grad_norm": 5.743098806772147, "learning_rate": 8.02160270355918e-06, "loss": 17.3204, "step": 17214 }, { "epoch": 0.3146763668269143, "grad_norm": 7.267947435405922, "learning_rate": 8.02136685364929e-06, "loss": 17.5826, "step": 17215 }, { "epoch": 0.31469464602336084, "grad_norm": 7.912004941148593, "learning_rate": 8.021130993149773e-06, "loss": 17.9818, "step": 17216 }, { "epoch": 0.3147129252198073, "grad_norm": 5.280419012227969, "learning_rate": 8.020895122061454e-06, "loss": 16.951, "step": 17217 }, { "epoch": 0.31473120441625385, "grad_norm": 6.873611055670704, "learning_rate": 8.020659240385157e-06, "loss": 17.6168, "step": 17218 }, { "epoch": 0.3147494836127004, "grad_norm": 8.206195024040627, "learning_rate": 8.020423348121713e-06, "loss": 18.5465, "step": 17219 }, { "epoch": 0.3147677628091469, "grad_norm": 6.962185452648408, "learning_rate": 8.020187445271947e-06, "loss": 17.6953, "step": 17220 }, { "epoch": 0.31478604200559346, "grad_norm": 7.088755955462132, "learning_rate": 8.019951531836686e-06, "loss": 17.9777, "step": 17221 }, { "epoch": 0.31480432120203994, "grad_norm": 6.190448969393758, "learning_rate": 8.019715607816758e-06, "loss": 17.3383, "step": 17222 }, { "epoch": 0.3148226003984865, "grad_norm": 6.980175547078554, "learning_rate": 8.019479673212988e-06, "loss": 17.8941, "step": 17223 }, { "epoch": 0.314840879594933, "grad_norm": 6.766442043943911, "learning_rate": 8.019243728026204e-06, "loss": 17.7019, "step": 17224 }, { "epoch": 0.31485915879137955, "grad_norm": 8.86342108336854, "learning_rate": 8.019007772257233e-06, "loss": 18.7092, "step": 17225 }, { "epoch": 0.3148774379878261, "grad_norm": 6.873461870005551, "learning_rate": 8.0187718059069e-06, "loss": 17.7726, "step": 17226 }, { "epoch": 0.31489571718427256, "grad_norm": 7.5290291440852535, "learning_rate": 8.018535828976035e-06, "loss": 18.0024, "step": 17227 }, { "epoch": 0.3149139963807191, "grad_norm": 7.28258231935665, "learning_rate": 8.018299841465464e-06, "loss": 17.7803, "step": 17228 }, { "epoch": 0.31493227557716563, "grad_norm": 5.672384628255509, "learning_rate": 8.018063843376014e-06, "loss": 17.1077, "step": 17229 }, { "epoch": 0.31495055477361217, "grad_norm": 6.763207668043426, "learning_rate": 8.017827834708513e-06, "loss": 17.8968, "step": 17230 }, { "epoch": 0.3149688339700587, "grad_norm": 6.352084908004501, "learning_rate": 8.017591815463785e-06, "loss": 17.9149, "step": 17231 }, { "epoch": 0.3149871131665052, "grad_norm": 5.401660204723154, "learning_rate": 8.017355785642661e-06, "loss": 17.0605, "step": 17232 }, { "epoch": 0.3150053923629517, "grad_norm": 6.3157623324548124, "learning_rate": 8.017119745245969e-06, "loss": 17.4241, "step": 17233 }, { "epoch": 0.31502367155939826, "grad_norm": 7.187095964596448, "learning_rate": 8.01688369427453e-06, "loss": 17.8613, "step": 17234 }, { "epoch": 0.3150419507558448, "grad_norm": 6.45123413937246, "learning_rate": 8.016647632729177e-06, "loss": 17.6179, "step": 17235 }, { "epoch": 0.31506022995229127, "grad_norm": 6.373516121494759, "learning_rate": 8.016411560610737e-06, "loss": 17.4736, "step": 17236 }, { "epoch": 0.3150785091487378, "grad_norm": 7.561806774375053, "learning_rate": 8.016175477920036e-06, "loss": 17.8736, "step": 17237 }, { "epoch": 0.31509678834518434, "grad_norm": 8.012843117739664, "learning_rate": 8.015939384657901e-06, "loss": 18.4852, "step": 17238 }, { "epoch": 0.3151150675416309, "grad_norm": 6.270369077479934, "learning_rate": 8.015703280825158e-06, "loss": 17.4573, "step": 17239 }, { "epoch": 0.3151333467380774, "grad_norm": 7.8125620468099095, "learning_rate": 8.015467166422641e-06, "loss": 18.0803, "step": 17240 }, { "epoch": 0.3151516259345239, "grad_norm": 6.951899482123191, "learning_rate": 8.01523104145117e-06, "loss": 17.6458, "step": 17241 }, { "epoch": 0.31516990513097043, "grad_norm": 7.537431181425768, "learning_rate": 8.014994905911577e-06, "loss": 17.6453, "step": 17242 }, { "epoch": 0.31518818432741696, "grad_norm": 7.574629766724625, "learning_rate": 8.014758759804688e-06, "loss": 17.9085, "step": 17243 }, { "epoch": 0.3152064635238635, "grad_norm": 6.280790363627346, "learning_rate": 8.014522603131332e-06, "loss": 17.7072, "step": 17244 }, { "epoch": 0.31522474272031004, "grad_norm": 6.663546949785355, "learning_rate": 8.014286435892335e-06, "loss": 17.7714, "step": 17245 }, { "epoch": 0.3152430219167565, "grad_norm": 5.644993620118886, "learning_rate": 8.014050258088527e-06, "loss": 17.1598, "step": 17246 }, { "epoch": 0.31526130111320305, "grad_norm": 6.549006480041142, "learning_rate": 8.013814069720733e-06, "loss": 17.5526, "step": 17247 }, { "epoch": 0.3152795803096496, "grad_norm": 6.563431303899967, "learning_rate": 8.013577870789783e-06, "loss": 17.3868, "step": 17248 }, { "epoch": 0.3152978595060961, "grad_norm": 5.361171220750779, "learning_rate": 8.013341661296503e-06, "loss": 17.0847, "step": 17249 }, { "epoch": 0.31531613870254266, "grad_norm": 8.254859259003663, "learning_rate": 8.013105441241722e-06, "loss": 18.2838, "step": 17250 }, { "epoch": 0.31533441789898914, "grad_norm": 7.036847900780292, "learning_rate": 8.01286921062627e-06, "loss": 17.8468, "step": 17251 }, { "epoch": 0.3153526970954357, "grad_norm": 9.258121444683413, "learning_rate": 8.012632969450971e-06, "loss": 18.5679, "step": 17252 }, { "epoch": 0.3153709762918822, "grad_norm": 7.0027051711961015, "learning_rate": 8.012396717716655e-06, "loss": 17.6236, "step": 17253 }, { "epoch": 0.31538925548832875, "grad_norm": 7.772629492817567, "learning_rate": 8.01216045542415e-06, "loss": 18.0685, "step": 17254 }, { "epoch": 0.3154075346847753, "grad_norm": 6.821788964207853, "learning_rate": 8.011924182574285e-06, "loss": 17.4284, "step": 17255 }, { "epoch": 0.31542581388122176, "grad_norm": 8.243962954711291, "learning_rate": 8.011687899167885e-06, "loss": 17.9809, "step": 17256 }, { "epoch": 0.3154440930776683, "grad_norm": 5.941002081332321, "learning_rate": 8.011451605205782e-06, "loss": 17.2933, "step": 17257 }, { "epoch": 0.31546237227411483, "grad_norm": 5.7085955853111665, "learning_rate": 8.011215300688803e-06, "loss": 17.4081, "step": 17258 }, { "epoch": 0.31548065147056137, "grad_norm": 7.489969880342292, "learning_rate": 8.010978985617775e-06, "loss": 18.0581, "step": 17259 }, { "epoch": 0.3154989306670079, "grad_norm": 6.170305090550007, "learning_rate": 8.010742659993525e-06, "loss": 17.3531, "step": 17260 }, { "epoch": 0.3155172098634544, "grad_norm": 5.761930461774254, "learning_rate": 8.010506323816886e-06, "loss": 17.1373, "step": 17261 }, { "epoch": 0.3155354890599009, "grad_norm": 7.579984190464123, "learning_rate": 8.010269977088684e-06, "loss": 18.2122, "step": 17262 }, { "epoch": 0.31555376825634746, "grad_norm": 8.04920914814461, "learning_rate": 8.010033619809744e-06, "loss": 18.2227, "step": 17263 }, { "epoch": 0.315572047452794, "grad_norm": 6.980271621424535, "learning_rate": 8.009797251980898e-06, "loss": 17.4735, "step": 17264 }, { "epoch": 0.3155903266492405, "grad_norm": 7.203646358916323, "learning_rate": 8.009560873602976e-06, "loss": 17.9982, "step": 17265 }, { "epoch": 0.315608605845687, "grad_norm": 7.014703836732413, "learning_rate": 8.009324484676801e-06, "loss": 17.7615, "step": 17266 }, { "epoch": 0.31562688504213354, "grad_norm": 8.962230500198741, "learning_rate": 8.009088085203207e-06, "loss": 18.1978, "step": 17267 }, { "epoch": 0.3156451642385801, "grad_norm": 6.798689249987276, "learning_rate": 8.00885167518302e-06, "loss": 17.7435, "step": 17268 }, { "epoch": 0.3156634434350266, "grad_norm": 6.71692463745285, "learning_rate": 8.00861525461707e-06, "loss": 17.6106, "step": 17269 }, { "epoch": 0.3156817226314731, "grad_norm": 6.0212322090815045, "learning_rate": 8.008378823506183e-06, "loss": 17.4011, "step": 17270 }, { "epoch": 0.31570000182791963, "grad_norm": 6.323795571367101, "learning_rate": 8.008142381851191e-06, "loss": 17.339, "step": 17271 }, { "epoch": 0.31571828102436617, "grad_norm": 5.162207674821056, "learning_rate": 8.007905929652919e-06, "loss": 16.9637, "step": 17272 }, { "epoch": 0.3157365602208127, "grad_norm": 6.40622354301321, "learning_rate": 8.007669466912197e-06, "loss": 17.2422, "step": 17273 }, { "epoch": 0.31575483941725924, "grad_norm": 6.380792081899297, "learning_rate": 8.007432993629857e-06, "loss": 17.3429, "step": 17274 }, { "epoch": 0.3157731186137057, "grad_norm": 7.435868585263863, "learning_rate": 8.007196509806724e-06, "loss": 17.7877, "step": 17275 }, { "epoch": 0.31579139781015225, "grad_norm": 9.257972846005018, "learning_rate": 8.00696001544363e-06, "loss": 18.0455, "step": 17276 }, { "epoch": 0.3158096770065988, "grad_norm": 6.444610897364533, "learning_rate": 8.006723510541401e-06, "loss": 17.6732, "step": 17277 }, { "epoch": 0.3158279562030453, "grad_norm": 6.669997196515607, "learning_rate": 8.006486995100866e-06, "loss": 17.6559, "step": 17278 }, { "epoch": 0.31584623539949186, "grad_norm": 5.975750319203101, "learning_rate": 8.006250469122857e-06, "loss": 17.3374, "step": 17279 }, { "epoch": 0.31586451459593834, "grad_norm": 6.378632875562156, "learning_rate": 8.0060139326082e-06, "loss": 17.4521, "step": 17280 }, { "epoch": 0.3158827937923849, "grad_norm": 6.1339635490609234, "learning_rate": 8.005777385557723e-06, "loss": 17.3795, "step": 17281 }, { "epoch": 0.3159010729888314, "grad_norm": 7.2323692727419315, "learning_rate": 8.005540827972259e-06, "loss": 17.9336, "step": 17282 }, { "epoch": 0.31591935218527795, "grad_norm": 5.936761222618097, "learning_rate": 8.005304259852636e-06, "loss": 17.2524, "step": 17283 }, { "epoch": 0.3159376313817245, "grad_norm": 6.986959874057104, "learning_rate": 8.00506768119968e-06, "loss": 17.6888, "step": 17284 }, { "epoch": 0.31595591057817096, "grad_norm": 6.618194378270937, "learning_rate": 8.004831092014224e-06, "loss": 17.7176, "step": 17285 }, { "epoch": 0.3159741897746175, "grad_norm": 6.618019920452158, "learning_rate": 8.004594492297095e-06, "loss": 17.5945, "step": 17286 }, { "epoch": 0.31599246897106403, "grad_norm": 6.229218484130583, "learning_rate": 8.004357882049125e-06, "loss": 17.3467, "step": 17287 }, { "epoch": 0.31601074816751057, "grad_norm": 6.583558412131839, "learning_rate": 8.004121261271139e-06, "loss": 17.7141, "step": 17288 }, { "epoch": 0.3160290273639571, "grad_norm": 5.570932105293516, "learning_rate": 8.00388462996397e-06, "loss": 17.0904, "step": 17289 }, { "epoch": 0.3160473065604036, "grad_norm": 6.773103782373799, "learning_rate": 8.003647988128447e-06, "loss": 17.823, "step": 17290 }, { "epoch": 0.3160655857568501, "grad_norm": 7.140574485216784, "learning_rate": 8.003411335765397e-06, "loss": 17.8718, "step": 17291 }, { "epoch": 0.31608386495329666, "grad_norm": 7.65391096461442, "learning_rate": 8.00317467287565e-06, "loss": 18.1112, "step": 17292 }, { "epoch": 0.3161021441497432, "grad_norm": 6.4821818518534196, "learning_rate": 8.002937999460038e-06, "loss": 17.4704, "step": 17293 }, { "epoch": 0.3161204233461897, "grad_norm": 4.8915957927763465, "learning_rate": 8.002701315519388e-06, "loss": 16.8415, "step": 17294 }, { "epoch": 0.3161387025426362, "grad_norm": 6.98314453855389, "learning_rate": 8.002464621054531e-06, "loss": 17.8192, "step": 17295 }, { "epoch": 0.31615698173908274, "grad_norm": 5.695664433405737, "learning_rate": 8.002227916066297e-06, "loss": 17.4068, "step": 17296 }, { "epoch": 0.3161752609355293, "grad_norm": 7.0937901152485265, "learning_rate": 8.001991200555512e-06, "loss": 18.2263, "step": 17297 }, { "epoch": 0.3161935401319758, "grad_norm": 6.613123201057216, "learning_rate": 8.00175447452301e-06, "loss": 17.7912, "step": 17298 }, { "epoch": 0.31621181932842235, "grad_norm": 5.779613572039592, "learning_rate": 8.00151773796962e-06, "loss": 17.1359, "step": 17299 }, { "epoch": 0.31623009852486883, "grad_norm": 5.56224046592532, "learning_rate": 8.00128099089617e-06, "loss": 17.1956, "step": 17300 }, { "epoch": 0.31624837772131537, "grad_norm": 5.978160083714933, "learning_rate": 8.00104423330349e-06, "loss": 17.1161, "step": 17301 }, { "epoch": 0.3162666569177619, "grad_norm": 7.990643806813121, "learning_rate": 8.000807465192411e-06, "loss": 18.4523, "step": 17302 }, { "epoch": 0.31628493611420844, "grad_norm": 5.567845383398115, "learning_rate": 8.00057068656376e-06, "loss": 17.1607, "step": 17303 }, { "epoch": 0.3163032153106549, "grad_norm": 5.922321863482638, "learning_rate": 8.000333897418372e-06, "loss": 17.5144, "step": 17304 }, { "epoch": 0.31632149450710145, "grad_norm": 5.705829061206794, "learning_rate": 8.000097097757072e-06, "loss": 17.2625, "step": 17305 }, { "epoch": 0.316339773703548, "grad_norm": 6.227613307187678, "learning_rate": 7.999860287580694e-06, "loss": 17.6444, "step": 17306 }, { "epoch": 0.3163580528999945, "grad_norm": 7.018679087373992, "learning_rate": 7.999623466890065e-06, "loss": 17.5985, "step": 17307 }, { "epoch": 0.31637633209644106, "grad_norm": 6.960499383597473, "learning_rate": 7.999386635686016e-06, "loss": 17.6521, "step": 17308 }, { "epoch": 0.31639461129288754, "grad_norm": 6.187277667793289, "learning_rate": 7.999149793969377e-06, "loss": 17.3097, "step": 17309 }, { "epoch": 0.3164128904893341, "grad_norm": 6.838715198564892, "learning_rate": 7.99891294174098e-06, "loss": 17.6548, "step": 17310 }, { "epoch": 0.3164311696857806, "grad_norm": 7.189376662082501, "learning_rate": 7.998676079001651e-06, "loss": 17.9041, "step": 17311 }, { "epoch": 0.31644944888222715, "grad_norm": 6.9285400299559585, "learning_rate": 7.998439205752222e-06, "loss": 17.7576, "step": 17312 }, { "epoch": 0.3164677280786737, "grad_norm": 6.837096459838117, "learning_rate": 7.998202321993527e-06, "loss": 17.5272, "step": 17313 }, { "epoch": 0.31648600727512016, "grad_norm": 6.5943505560026425, "learning_rate": 7.997965427726391e-06, "loss": 17.3491, "step": 17314 }, { "epoch": 0.3165042864715667, "grad_norm": 6.8997158881354235, "learning_rate": 7.997728522951646e-06, "loss": 18.0895, "step": 17315 }, { "epoch": 0.31652256566801323, "grad_norm": 5.475364935696605, "learning_rate": 7.997491607670123e-06, "loss": 17.2759, "step": 17316 }, { "epoch": 0.31654084486445977, "grad_norm": 6.76389765174802, "learning_rate": 7.997254681882652e-06, "loss": 17.7249, "step": 17317 }, { "epoch": 0.3165591240609063, "grad_norm": 6.563208550847732, "learning_rate": 7.997017745590064e-06, "loss": 17.7053, "step": 17318 }, { "epoch": 0.3165774032573528, "grad_norm": 6.285419678293115, "learning_rate": 7.996780798793187e-06, "loss": 17.2764, "step": 17319 }, { "epoch": 0.3165956824537993, "grad_norm": 6.2225713999306125, "learning_rate": 7.996543841492857e-06, "loss": 17.4828, "step": 17320 }, { "epoch": 0.31661396165024586, "grad_norm": 6.265834466442455, "learning_rate": 7.996306873689899e-06, "loss": 17.2636, "step": 17321 }, { "epoch": 0.3166322408466924, "grad_norm": 6.726600881578628, "learning_rate": 7.996069895385143e-06, "loss": 17.4049, "step": 17322 }, { "epoch": 0.3166505200431389, "grad_norm": 6.485672045106972, "learning_rate": 7.995832906579426e-06, "loss": 17.3534, "step": 17323 }, { "epoch": 0.3166687992395854, "grad_norm": 8.904328244583729, "learning_rate": 7.995595907273573e-06, "loss": 18.7114, "step": 17324 }, { "epoch": 0.31668707843603194, "grad_norm": 6.148101540336492, "learning_rate": 7.995358897468414e-06, "loss": 17.4741, "step": 17325 }, { "epoch": 0.3167053576324785, "grad_norm": 6.932768245039868, "learning_rate": 7.995121877164784e-06, "loss": 17.4276, "step": 17326 }, { "epoch": 0.316723636828925, "grad_norm": 7.669086120789412, "learning_rate": 7.994884846363513e-06, "loss": 17.7421, "step": 17327 }, { "epoch": 0.31674191602537155, "grad_norm": 5.84113389394122, "learning_rate": 7.99464780506543e-06, "loss": 17.3566, "step": 17328 }, { "epoch": 0.31676019522181803, "grad_norm": 6.880726002467942, "learning_rate": 7.994410753271365e-06, "loss": 17.6299, "step": 17329 }, { "epoch": 0.31677847441826457, "grad_norm": 6.734761939125572, "learning_rate": 7.99417369098215e-06, "loss": 17.988, "step": 17330 }, { "epoch": 0.3167967536147111, "grad_norm": 6.721574535046607, "learning_rate": 7.993936618198616e-06, "loss": 17.5829, "step": 17331 }, { "epoch": 0.31681503281115764, "grad_norm": 6.424721329456516, "learning_rate": 7.993699534921594e-06, "loss": 17.7047, "step": 17332 }, { "epoch": 0.3168333120076042, "grad_norm": 7.5887974576988615, "learning_rate": 7.993462441151918e-06, "loss": 17.8649, "step": 17333 }, { "epoch": 0.31685159120405065, "grad_norm": 5.820842922740516, "learning_rate": 7.993225336890414e-06, "loss": 17.3976, "step": 17334 }, { "epoch": 0.3168698704004972, "grad_norm": 6.607944164689301, "learning_rate": 7.992988222137914e-06, "loss": 17.5775, "step": 17335 }, { "epoch": 0.3168881495969437, "grad_norm": 6.239762600115004, "learning_rate": 7.99275109689525e-06, "loss": 17.5336, "step": 17336 }, { "epoch": 0.31690642879339026, "grad_norm": 6.274828033999698, "learning_rate": 7.992513961163253e-06, "loss": 17.3763, "step": 17337 }, { "epoch": 0.31692470798983674, "grad_norm": 7.08519694139076, "learning_rate": 7.992276814942756e-06, "loss": 17.5813, "step": 17338 }, { "epoch": 0.3169429871862833, "grad_norm": 7.3140485087203375, "learning_rate": 7.992039658234586e-06, "loss": 17.8942, "step": 17339 }, { "epoch": 0.3169612663827298, "grad_norm": 7.207227940097351, "learning_rate": 7.99180249103958e-06, "loss": 18.0972, "step": 17340 }, { "epoch": 0.31697954557917635, "grad_norm": 6.858963748711504, "learning_rate": 7.991565313358562e-06, "loss": 17.7875, "step": 17341 }, { "epoch": 0.3169978247756229, "grad_norm": 6.556687275015834, "learning_rate": 7.991328125192368e-06, "loss": 17.3149, "step": 17342 }, { "epoch": 0.31701610397206936, "grad_norm": 6.490040996250598, "learning_rate": 7.99109092654183e-06, "loss": 18.3425, "step": 17343 }, { "epoch": 0.3170343831685159, "grad_norm": 5.979798621442256, "learning_rate": 7.990853717407778e-06, "loss": 17.4498, "step": 17344 }, { "epoch": 0.31705266236496243, "grad_norm": 7.520527607230535, "learning_rate": 7.990616497791043e-06, "loss": 18.1362, "step": 17345 }, { "epoch": 0.31707094156140897, "grad_norm": 8.00210194451691, "learning_rate": 7.990379267692455e-06, "loss": 17.9182, "step": 17346 }, { "epoch": 0.3170892207578555, "grad_norm": 6.0069612982067415, "learning_rate": 7.990142027112849e-06, "loss": 17.13, "step": 17347 }, { "epoch": 0.317107499954302, "grad_norm": 7.431075068686617, "learning_rate": 7.989904776053054e-06, "loss": 17.8591, "step": 17348 }, { "epoch": 0.3171257791507485, "grad_norm": 7.194017830536698, "learning_rate": 7.989667514513903e-06, "loss": 17.9881, "step": 17349 }, { "epoch": 0.31714405834719506, "grad_norm": 7.2560760693483894, "learning_rate": 7.989430242496226e-06, "loss": 18.1042, "step": 17350 }, { "epoch": 0.3171623375436416, "grad_norm": 6.127488058005536, "learning_rate": 7.989192960000855e-06, "loss": 17.3964, "step": 17351 }, { "epoch": 0.31718061674008813, "grad_norm": 6.820769982935315, "learning_rate": 7.988955667028622e-06, "loss": 17.7718, "step": 17352 }, { "epoch": 0.3171988959365346, "grad_norm": 5.801515730191406, "learning_rate": 7.988718363580359e-06, "loss": 17.4517, "step": 17353 }, { "epoch": 0.31721717513298114, "grad_norm": 6.102441621651423, "learning_rate": 7.9884810496569e-06, "loss": 17.2964, "step": 17354 }, { "epoch": 0.3172354543294277, "grad_norm": 5.961007345755837, "learning_rate": 7.988243725259071e-06, "loss": 17.3146, "step": 17355 }, { "epoch": 0.3172537335258742, "grad_norm": 6.451354412838017, "learning_rate": 7.988006390387707e-06, "loss": 17.4721, "step": 17356 }, { "epoch": 0.31727201272232075, "grad_norm": 6.165619916006797, "learning_rate": 7.98776904504364e-06, "loss": 17.3525, "step": 17357 }, { "epoch": 0.31729029191876723, "grad_norm": 6.448672229159587, "learning_rate": 7.987531689227705e-06, "loss": 17.5783, "step": 17358 }, { "epoch": 0.31730857111521377, "grad_norm": 6.547768556287841, "learning_rate": 7.987294322940728e-06, "loss": 17.629, "step": 17359 }, { "epoch": 0.3173268503116603, "grad_norm": 6.2012472626810355, "learning_rate": 7.987056946183544e-06, "loss": 17.5342, "step": 17360 }, { "epoch": 0.31734512950810684, "grad_norm": 7.712075728461025, "learning_rate": 7.986819558956984e-06, "loss": 18.4234, "step": 17361 }, { "epoch": 0.3173634087045534, "grad_norm": 7.181988503389922, "learning_rate": 7.986582161261881e-06, "loss": 17.7457, "step": 17362 }, { "epoch": 0.31738168790099985, "grad_norm": 5.440387236226897, "learning_rate": 7.986344753099067e-06, "loss": 17.1153, "step": 17363 }, { "epoch": 0.3173999670974464, "grad_norm": 5.29858840328652, "learning_rate": 7.986107334469374e-06, "loss": 17.1398, "step": 17364 }, { "epoch": 0.3174182462938929, "grad_norm": 6.48987557252202, "learning_rate": 7.985869905373635e-06, "loss": 17.6772, "step": 17365 }, { "epoch": 0.31743652549033946, "grad_norm": 7.141359406982921, "learning_rate": 7.985632465812679e-06, "loss": 17.8821, "step": 17366 }, { "epoch": 0.317454804686786, "grad_norm": 6.660119386170312, "learning_rate": 7.98539501578734e-06, "loss": 17.6982, "step": 17367 }, { "epoch": 0.3174730838832325, "grad_norm": 5.7470754973462554, "learning_rate": 7.985157555298453e-06, "loss": 17.2834, "step": 17368 }, { "epoch": 0.317491363079679, "grad_norm": 5.582415356984512, "learning_rate": 7.984920084346845e-06, "loss": 17.2948, "step": 17369 }, { "epoch": 0.31750964227612555, "grad_norm": 7.13506128815316, "learning_rate": 7.984682602933353e-06, "loss": 17.8519, "step": 17370 }, { "epoch": 0.3175279214725721, "grad_norm": 7.337779130993448, "learning_rate": 7.984445111058807e-06, "loss": 18.0208, "step": 17371 }, { "epoch": 0.31754620066901856, "grad_norm": 5.21294512449078, "learning_rate": 7.98420760872404e-06, "loss": 17.0952, "step": 17372 }, { "epoch": 0.3175644798654651, "grad_norm": 6.4250015353459276, "learning_rate": 7.983970095929884e-06, "loss": 17.4149, "step": 17373 }, { "epoch": 0.31758275906191163, "grad_norm": 5.239606804881995, "learning_rate": 7.983732572677172e-06, "loss": 16.8856, "step": 17374 }, { "epoch": 0.31760103825835817, "grad_norm": 7.069788966658681, "learning_rate": 7.983495038966735e-06, "loss": 17.5749, "step": 17375 }, { "epoch": 0.3176193174548047, "grad_norm": 6.574379653563129, "learning_rate": 7.98325749479941e-06, "loss": 17.8001, "step": 17376 }, { "epoch": 0.3176375966512512, "grad_norm": 6.548469582548624, "learning_rate": 7.983019940176024e-06, "loss": 17.4517, "step": 17377 }, { "epoch": 0.3176558758476977, "grad_norm": 5.8811889627432805, "learning_rate": 7.982782375097412e-06, "loss": 17.2188, "step": 17378 }, { "epoch": 0.31767415504414426, "grad_norm": 6.428892726108311, "learning_rate": 7.982544799564407e-06, "loss": 17.5082, "step": 17379 }, { "epoch": 0.3176924342405908, "grad_norm": 9.126127628108254, "learning_rate": 7.98230721357784e-06, "loss": 18.6344, "step": 17380 }, { "epoch": 0.31771071343703733, "grad_norm": 5.123403464589389, "learning_rate": 7.982069617138545e-06, "loss": 16.9785, "step": 17381 }, { "epoch": 0.3177289926334838, "grad_norm": 5.754014486785964, "learning_rate": 7.981832010247358e-06, "loss": 17.3393, "step": 17382 }, { "epoch": 0.31774727182993034, "grad_norm": 6.143688327756162, "learning_rate": 7.981594392905105e-06, "loss": 17.4204, "step": 17383 }, { "epoch": 0.3177655510263769, "grad_norm": 5.7456749514990175, "learning_rate": 7.981356765112624e-06, "loss": 17.1715, "step": 17384 }, { "epoch": 0.3177838302228234, "grad_norm": 7.125977071006484, "learning_rate": 7.981119126870747e-06, "loss": 17.6395, "step": 17385 }, { "epoch": 0.31780210941926995, "grad_norm": 6.775925131844454, "learning_rate": 7.980881478180305e-06, "loss": 17.5778, "step": 17386 }, { "epoch": 0.31782038861571643, "grad_norm": 5.346890123465364, "learning_rate": 7.980643819042132e-06, "loss": 16.9998, "step": 17387 }, { "epoch": 0.31783866781216297, "grad_norm": 6.44156051332979, "learning_rate": 7.980406149457062e-06, "loss": 17.615, "step": 17388 }, { "epoch": 0.3178569470086095, "grad_norm": 6.542473041592614, "learning_rate": 7.980168469425926e-06, "loss": 17.9314, "step": 17389 }, { "epoch": 0.31787522620505604, "grad_norm": 7.14022172765695, "learning_rate": 7.979930778949559e-06, "loss": 17.8826, "step": 17390 }, { "epoch": 0.3178935054015026, "grad_norm": 6.3171456407064035, "learning_rate": 7.979693078028792e-06, "loss": 17.7095, "step": 17391 }, { "epoch": 0.31791178459794905, "grad_norm": 7.237654991147253, "learning_rate": 7.979455366664461e-06, "loss": 17.7842, "step": 17392 }, { "epoch": 0.3179300637943956, "grad_norm": 6.316166156805975, "learning_rate": 7.979217644857395e-06, "loss": 17.4043, "step": 17393 }, { "epoch": 0.3179483429908421, "grad_norm": 6.222195360899406, "learning_rate": 7.978979912608432e-06, "loss": 17.3792, "step": 17394 }, { "epoch": 0.31796662218728866, "grad_norm": 7.086724627567828, "learning_rate": 7.978742169918403e-06, "loss": 17.4918, "step": 17395 }, { "epoch": 0.3179849013837352, "grad_norm": 6.821847337797812, "learning_rate": 7.97850441678814e-06, "loss": 17.6306, "step": 17396 }, { "epoch": 0.3180031805801817, "grad_norm": 6.894452783898032, "learning_rate": 7.978266653218478e-06, "loss": 17.6678, "step": 17397 }, { "epoch": 0.3180214597766282, "grad_norm": 7.5576865526535055, "learning_rate": 7.978028879210249e-06, "loss": 17.8944, "step": 17398 }, { "epoch": 0.31803973897307475, "grad_norm": 6.537276779955941, "learning_rate": 7.977791094764288e-06, "loss": 17.436, "step": 17399 }, { "epoch": 0.3180580181695213, "grad_norm": 5.298225272967506, "learning_rate": 7.977553299881428e-06, "loss": 17.0772, "step": 17400 }, { "epoch": 0.3180762973659678, "grad_norm": 7.239544698165914, "learning_rate": 7.9773154945625e-06, "loss": 17.998, "step": 17401 }, { "epoch": 0.3180945765624143, "grad_norm": 6.289866225021799, "learning_rate": 7.977077678808342e-06, "loss": 17.462, "step": 17402 }, { "epoch": 0.31811285575886084, "grad_norm": 6.5872078956297475, "learning_rate": 7.976839852619785e-06, "loss": 17.5857, "step": 17403 }, { "epoch": 0.31813113495530737, "grad_norm": 7.102906806243845, "learning_rate": 7.976602015997662e-06, "loss": 17.8424, "step": 17404 }, { "epoch": 0.3181494141517539, "grad_norm": 6.1827037640816975, "learning_rate": 7.976364168942807e-06, "loss": 17.3523, "step": 17405 }, { "epoch": 0.3181676933482004, "grad_norm": 7.042763867266987, "learning_rate": 7.976126311456054e-06, "loss": 17.7008, "step": 17406 }, { "epoch": 0.3181859725446469, "grad_norm": 6.073728287871449, "learning_rate": 7.975888443538235e-06, "loss": 17.3641, "step": 17407 }, { "epoch": 0.31820425174109346, "grad_norm": 5.714845719217169, "learning_rate": 7.975650565190187e-06, "loss": 17.4318, "step": 17408 }, { "epoch": 0.31822253093754, "grad_norm": 7.3403681460001025, "learning_rate": 7.975412676412742e-06, "loss": 17.9309, "step": 17409 }, { "epoch": 0.31824081013398653, "grad_norm": 6.26588164655034, "learning_rate": 7.975174777206733e-06, "loss": 17.543, "step": 17410 }, { "epoch": 0.318259089330433, "grad_norm": 5.042416906249586, "learning_rate": 7.974936867572995e-06, "loss": 16.921, "step": 17411 }, { "epoch": 0.31827736852687954, "grad_norm": 5.8848114121785455, "learning_rate": 7.974698947512362e-06, "loss": 17.5731, "step": 17412 }, { "epoch": 0.3182956477233261, "grad_norm": 7.517702316834048, "learning_rate": 7.974461017025667e-06, "loss": 17.9577, "step": 17413 }, { "epoch": 0.3183139269197726, "grad_norm": 5.918598179588317, "learning_rate": 7.974223076113744e-06, "loss": 17.278, "step": 17414 }, { "epoch": 0.31833220611621915, "grad_norm": 5.42916953972551, "learning_rate": 7.973985124777427e-06, "loss": 17.1337, "step": 17415 }, { "epoch": 0.31835048531266563, "grad_norm": 10.107034878062095, "learning_rate": 7.973747163017552e-06, "loss": 18.4012, "step": 17416 }, { "epoch": 0.31836876450911217, "grad_norm": 6.690150743389067, "learning_rate": 7.97350919083495e-06, "loss": 17.5545, "step": 17417 }, { "epoch": 0.3183870437055587, "grad_norm": 6.481417714103426, "learning_rate": 7.973271208230454e-06, "loss": 17.5627, "step": 17418 }, { "epoch": 0.31840532290200524, "grad_norm": 6.523033117753645, "learning_rate": 7.973033215204902e-06, "loss": 17.5143, "step": 17419 }, { "epoch": 0.3184236020984518, "grad_norm": 7.0409480642755655, "learning_rate": 7.972795211759129e-06, "loss": 17.7209, "step": 17420 }, { "epoch": 0.31844188129489825, "grad_norm": 6.998002046816529, "learning_rate": 7.972557197893964e-06, "loss": 17.6533, "step": 17421 }, { "epoch": 0.3184601604913448, "grad_norm": 7.4785771956664435, "learning_rate": 7.972319173610243e-06, "loss": 18.058, "step": 17422 }, { "epoch": 0.3184784396877913, "grad_norm": 6.994765865188953, "learning_rate": 7.972081138908805e-06, "loss": 17.7197, "step": 17423 }, { "epoch": 0.31849671888423786, "grad_norm": 7.442863291258745, "learning_rate": 7.971843093790477e-06, "loss": 17.7491, "step": 17424 }, { "epoch": 0.3185149980806844, "grad_norm": 6.4550573992928895, "learning_rate": 7.971605038256098e-06, "loss": 17.6854, "step": 17425 }, { "epoch": 0.3185332772771309, "grad_norm": 6.770599267949571, "learning_rate": 7.971366972306503e-06, "loss": 17.6583, "step": 17426 }, { "epoch": 0.3185515564735774, "grad_norm": 7.6163586371788154, "learning_rate": 7.971128895942522e-06, "loss": 17.9685, "step": 17427 }, { "epoch": 0.31856983567002395, "grad_norm": 7.577717258632709, "learning_rate": 7.970890809164992e-06, "loss": 18.0588, "step": 17428 }, { "epoch": 0.3185881148664705, "grad_norm": 8.542688583545747, "learning_rate": 7.97065271197475e-06, "loss": 18.0899, "step": 17429 }, { "epoch": 0.318606394062917, "grad_norm": 6.05510887570353, "learning_rate": 7.970414604372627e-06, "loss": 17.2955, "step": 17430 }, { "epoch": 0.3186246732593635, "grad_norm": 6.308549937419515, "learning_rate": 7.970176486359457e-06, "loss": 17.3666, "step": 17431 }, { "epoch": 0.31864295245581004, "grad_norm": 6.004841029169363, "learning_rate": 7.969938357936078e-06, "loss": 17.2362, "step": 17432 }, { "epoch": 0.31866123165225657, "grad_norm": 5.99162523397113, "learning_rate": 7.969700219103323e-06, "loss": 17.3407, "step": 17433 }, { "epoch": 0.3186795108487031, "grad_norm": 6.420638244051939, "learning_rate": 7.969462069862025e-06, "loss": 17.5247, "step": 17434 }, { "epoch": 0.31869779004514964, "grad_norm": 6.14558719914021, "learning_rate": 7.96922391021302e-06, "loss": 17.4104, "step": 17435 }, { "epoch": 0.3187160692415961, "grad_norm": 7.010187718192945, "learning_rate": 7.968985740157144e-06, "loss": 17.8984, "step": 17436 }, { "epoch": 0.31873434843804266, "grad_norm": 7.00973642192062, "learning_rate": 7.968747559695232e-06, "loss": 18.0226, "step": 17437 }, { "epoch": 0.3187526276344892, "grad_norm": 7.942679197149888, "learning_rate": 7.968509368828115e-06, "loss": 17.8861, "step": 17438 }, { "epoch": 0.31877090683093573, "grad_norm": 6.691062041059063, "learning_rate": 7.968271167556629e-06, "loss": 17.5306, "step": 17439 }, { "epoch": 0.3187891860273822, "grad_norm": 7.305086467569272, "learning_rate": 7.968032955881612e-06, "loss": 17.8021, "step": 17440 }, { "epoch": 0.31880746522382875, "grad_norm": 7.282716776920097, "learning_rate": 7.967794733803899e-06, "loss": 17.9196, "step": 17441 }, { "epoch": 0.3188257444202753, "grad_norm": 6.124799570346934, "learning_rate": 7.96755650132432e-06, "loss": 17.5765, "step": 17442 }, { "epoch": 0.3188440236167218, "grad_norm": 6.351901912431719, "learning_rate": 7.967318258443715e-06, "loss": 17.266, "step": 17443 }, { "epoch": 0.31886230281316835, "grad_norm": 7.462993494413, "learning_rate": 7.967080005162915e-06, "loss": 17.7182, "step": 17444 }, { "epoch": 0.31888058200961483, "grad_norm": 5.1388979891743425, "learning_rate": 7.966841741482757e-06, "loss": 17.0286, "step": 17445 }, { "epoch": 0.31889886120606137, "grad_norm": 7.108736057028803, "learning_rate": 7.966603467404079e-06, "loss": 17.679, "step": 17446 }, { "epoch": 0.3189171404025079, "grad_norm": 6.703201495150327, "learning_rate": 7.966365182927712e-06, "loss": 17.7453, "step": 17447 }, { "epoch": 0.31893541959895444, "grad_norm": 5.855150371783629, "learning_rate": 7.966126888054491e-06, "loss": 17.398, "step": 17448 }, { "epoch": 0.318953698795401, "grad_norm": 6.85273940483492, "learning_rate": 7.965888582785254e-06, "loss": 17.8339, "step": 17449 }, { "epoch": 0.31897197799184746, "grad_norm": 6.410416172121891, "learning_rate": 7.965650267120834e-06, "loss": 17.4644, "step": 17450 }, { "epoch": 0.318990257188294, "grad_norm": 7.057073891279669, "learning_rate": 7.96541194106207e-06, "loss": 17.5452, "step": 17451 }, { "epoch": 0.3190085363847405, "grad_norm": 7.045457363541464, "learning_rate": 7.96517360460979e-06, "loss": 17.6048, "step": 17452 }, { "epoch": 0.31902681558118706, "grad_norm": 6.154527110652669, "learning_rate": 7.964935257764836e-06, "loss": 17.3925, "step": 17453 }, { "epoch": 0.3190450947776336, "grad_norm": 7.464159396080561, "learning_rate": 7.964696900528042e-06, "loss": 18.3016, "step": 17454 }, { "epoch": 0.3190633739740801, "grad_norm": 5.749208152804058, "learning_rate": 7.964458532900242e-06, "loss": 17.3589, "step": 17455 }, { "epoch": 0.3190816531705266, "grad_norm": 8.00658019945167, "learning_rate": 7.96422015488227e-06, "loss": 18.218, "step": 17456 }, { "epoch": 0.31909993236697315, "grad_norm": 8.344669067885857, "learning_rate": 7.963981766474966e-06, "loss": 18.2281, "step": 17457 }, { "epoch": 0.3191182115634197, "grad_norm": 7.415546398600031, "learning_rate": 7.963743367679163e-06, "loss": 17.9363, "step": 17458 }, { "epoch": 0.3191364907598662, "grad_norm": 7.5905559663552005, "learning_rate": 7.963504958495695e-06, "loss": 17.9948, "step": 17459 }, { "epoch": 0.3191547699563127, "grad_norm": 7.352105628538097, "learning_rate": 7.963266538925401e-06, "loss": 18.1612, "step": 17460 }, { "epoch": 0.31917304915275924, "grad_norm": 6.36190368461596, "learning_rate": 7.963028108969115e-06, "loss": 17.6291, "step": 17461 }, { "epoch": 0.31919132834920577, "grad_norm": 6.576523290862874, "learning_rate": 7.962789668627672e-06, "loss": 17.451, "step": 17462 }, { "epoch": 0.3192096075456523, "grad_norm": 8.733940482155006, "learning_rate": 7.962551217901909e-06, "loss": 17.718, "step": 17463 }, { "epoch": 0.31922788674209884, "grad_norm": 7.95138733543129, "learning_rate": 7.962312756792659e-06, "loss": 18.3766, "step": 17464 }, { "epoch": 0.3192461659385453, "grad_norm": 7.864199906412713, "learning_rate": 7.962074285300763e-06, "loss": 18.3167, "step": 17465 }, { "epoch": 0.31926444513499186, "grad_norm": 6.142260208460528, "learning_rate": 7.96183580342705e-06, "loss": 17.4108, "step": 17466 }, { "epoch": 0.3192827243314384, "grad_norm": 7.183732960893861, "learning_rate": 7.961597311172361e-06, "loss": 17.479, "step": 17467 }, { "epoch": 0.31930100352788493, "grad_norm": 7.74294349359482, "learning_rate": 7.96135880853753e-06, "loss": 18.2792, "step": 17468 }, { "epoch": 0.31931928272433147, "grad_norm": 6.047273233634122, "learning_rate": 7.961120295523397e-06, "loss": 17.4092, "step": 17469 }, { "epoch": 0.31933756192077795, "grad_norm": 4.820243801648722, "learning_rate": 7.960881772130791e-06, "loss": 16.9432, "step": 17470 }, { "epoch": 0.3193558411172245, "grad_norm": 7.203332010453183, "learning_rate": 7.960643238360552e-06, "loss": 17.6185, "step": 17471 }, { "epoch": 0.319374120313671, "grad_norm": 5.7546710407216874, "learning_rate": 7.960404694213514e-06, "loss": 17.4795, "step": 17472 }, { "epoch": 0.31939239951011755, "grad_norm": 6.916722004121082, "learning_rate": 7.960166139690516e-06, "loss": 17.6398, "step": 17473 }, { "epoch": 0.31941067870656403, "grad_norm": 5.269629728558581, "learning_rate": 7.959927574792393e-06, "loss": 17.0616, "step": 17474 }, { "epoch": 0.31942895790301057, "grad_norm": 7.214591716120327, "learning_rate": 7.959688999519979e-06, "loss": 17.8556, "step": 17475 }, { "epoch": 0.3194472370994571, "grad_norm": 5.938926725577914, "learning_rate": 7.959450413874112e-06, "loss": 17.4313, "step": 17476 }, { "epoch": 0.31946551629590364, "grad_norm": 6.393551342787462, "learning_rate": 7.95921181785563e-06, "loss": 17.747, "step": 17477 }, { "epoch": 0.3194837954923502, "grad_norm": 6.970582896186193, "learning_rate": 7.958973211465366e-06, "loss": 17.4394, "step": 17478 }, { "epoch": 0.31950207468879666, "grad_norm": 6.540716048527766, "learning_rate": 7.958734594704158e-06, "loss": 17.6077, "step": 17479 }, { "epoch": 0.3195203538852432, "grad_norm": 6.393892115170374, "learning_rate": 7.958495967572842e-06, "loss": 17.7184, "step": 17480 }, { "epoch": 0.3195386330816897, "grad_norm": 6.378799462589449, "learning_rate": 7.958257330072255e-06, "loss": 17.5559, "step": 17481 }, { "epoch": 0.31955691227813626, "grad_norm": 6.667832837994266, "learning_rate": 7.95801868220323e-06, "loss": 17.6016, "step": 17482 }, { "epoch": 0.3195751914745828, "grad_norm": 6.471674145106354, "learning_rate": 7.95778002396661e-06, "loss": 17.5118, "step": 17483 }, { "epoch": 0.3195934706710293, "grad_norm": 6.955999185552674, "learning_rate": 7.957541355363225e-06, "loss": 17.8465, "step": 17484 }, { "epoch": 0.3196117498674758, "grad_norm": 8.602094459095065, "learning_rate": 7.957302676393916e-06, "loss": 18.5077, "step": 17485 }, { "epoch": 0.31963002906392235, "grad_norm": 7.357329600585394, "learning_rate": 7.957063987059517e-06, "loss": 17.6965, "step": 17486 }, { "epoch": 0.3196483082603689, "grad_norm": 6.773284659372345, "learning_rate": 7.956825287360864e-06, "loss": 17.6129, "step": 17487 }, { "epoch": 0.3196665874568154, "grad_norm": 7.487286498452414, "learning_rate": 7.956586577298798e-06, "loss": 18.1413, "step": 17488 }, { "epoch": 0.3196848666532619, "grad_norm": 7.478385783668679, "learning_rate": 7.95634785687415e-06, "loss": 17.8402, "step": 17489 }, { "epoch": 0.31970314584970844, "grad_norm": 6.650266303301079, "learning_rate": 7.956109126087759e-06, "loss": 17.7921, "step": 17490 }, { "epoch": 0.319721425046155, "grad_norm": 8.307400356261565, "learning_rate": 7.955870384940463e-06, "loss": 18.2952, "step": 17491 }, { "epoch": 0.3197397042426015, "grad_norm": 5.796790357817366, "learning_rate": 7.955631633433099e-06, "loss": 17.3616, "step": 17492 }, { "epoch": 0.31975798343904804, "grad_norm": 7.4729494518302815, "learning_rate": 7.955392871566501e-06, "loss": 18.1274, "step": 17493 }, { "epoch": 0.3197762626354945, "grad_norm": 7.555378140360098, "learning_rate": 7.955154099341509e-06, "loss": 17.7562, "step": 17494 }, { "epoch": 0.31979454183194106, "grad_norm": 5.510249169967757, "learning_rate": 7.954915316758955e-06, "loss": 17.403, "step": 17495 }, { "epoch": 0.3198128210283876, "grad_norm": 7.153243771245886, "learning_rate": 7.954676523819682e-06, "loss": 17.3068, "step": 17496 }, { "epoch": 0.31983110022483413, "grad_norm": 6.570997957736306, "learning_rate": 7.954437720524524e-06, "loss": 17.472, "step": 17497 }, { "epoch": 0.31984937942128067, "grad_norm": 7.388369987731288, "learning_rate": 7.954198906874318e-06, "loss": 17.7279, "step": 17498 }, { "epoch": 0.31986765861772715, "grad_norm": 5.970687406083828, "learning_rate": 7.953960082869901e-06, "loss": 17.2817, "step": 17499 }, { "epoch": 0.3198859378141737, "grad_norm": 5.885900804608633, "learning_rate": 7.95372124851211e-06, "loss": 17.4165, "step": 17500 }, { "epoch": 0.3199042170106202, "grad_norm": 5.55017902287351, "learning_rate": 7.953482403801782e-06, "loss": 17.2828, "step": 17501 }, { "epoch": 0.31992249620706675, "grad_norm": 8.790762996276861, "learning_rate": 7.953243548739756e-06, "loss": 18.4818, "step": 17502 }, { "epoch": 0.3199407754035133, "grad_norm": 6.927184560188266, "learning_rate": 7.953004683326867e-06, "loss": 17.8649, "step": 17503 }, { "epoch": 0.31995905459995977, "grad_norm": 5.5768742281237955, "learning_rate": 7.952765807563952e-06, "loss": 17.3118, "step": 17504 }, { "epoch": 0.3199773337964063, "grad_norm": 6.95477515024145, "learning_rate": 7.952526921451849e-06, "loss": 18.0654, "step": 17505 }, { "epoch": 0.31999561299285284, "grad_norm": 6.230271429962698, "learning_rate": 7.952288024991398e-06, "loss": 17.417, "step": 17506 }, { "epoch": 0.3200138921892994, "grad_norm": 6.747843091399649, "learning_rate": 7.952049118183429e-06, "loss": 17.6285, "step": 17507 }, { "epoch": 0.32003217138574586, "grad_norm": 7.916312733795419, "learning_rate": 7.951810201028787e-06, "loss": 18.3359, "step": 17508 }, { "epoch": 0.3200504505821924, "grad_norm": 6.5808985123475665, "learning_rate": 7.951571273528307e-06, "loss": 17.5606, "step": 17509 }, { "epoch": 0.3200687297786389, "grad_norm": 6.519203520268625, "learning_rate": 7.951332335682823e-06, "loss": 17.5328, "step": 17510 }, { "epoch": 0.32008700897508546, "grad_norm": 5.964109407732359, "learning_rate": 7.951093387493179e-06, "loss": 17.5441, "step": 17511 }, { "epoch": 0.320105288171532, "grad_norm": 7.177621814938111, "learning_rate": 7.950854428960207e-06, "loss": 17.819, "step": 17512 }, { "epoch": 0.3201235673679785, "grad_norm": 6.044403665427599, "learning_rate": 7.950615460084745e-06, "loss": 17.5882, "step": 17513 }, { "epoch": 0.320141846564425, "grad_norm": 7.707237807010002, "learning_rate": 7.950376480867633e-06, "loss": 17.7754, "step": 17514 }, { "epoch": 0.32016012576087155, "grad_norm": 6.508442052553958, "learning_rate": 7.950137491309708e-06, "loss": 17.3819, "step": 17515 }, { "epoch": 0.3201784049573181, "grad_norm": 6.812862237743727, "learning_rate": 7.949898491411807e-06, "loss": 17.7334, "step": 17516 }, { "epoch": 0.3201966841537646, "grad_norm": 5.788706744289365, "learning_rate": 7.949659481174768e-06, "loss": 17.2861, "step": 17517 }, { "epoch": 0.3202149633502111, "grad_norm": 6.495981215022219, "learning_rate": 7.949420460599425e-06, "loss": 17.8605, "step": 17518 }, { "epoch": 0.32023324254665764, "grad_norm": 7.151300394292834, "learning_rate": 7.949181429686624e-06, "loss": 18.113, "step": 17519 }, { "epoch": 0.3202515217431042, "grad_norm": 5.538760547001581, "learning_rate": 7.948942388437195e-06, "loss": 17.3418, "step": 17520 }, { "epoch": 0.3202698009395507, "grad_norm": 6.04893879279129, "learning_rate": 7.94870333685198e-06, "loss": 17.2436, "step": 17521 }, { "epoch": 0.32028808013599724, "grad_norm": 6.735375023901454, "learning_rate": 7.948464274931816e-06, "loss": 18.0333, "step": 17522 }, { "epoch": 0.3203063593324437, "grad_norm": 6.345674222165929, "learning_rate": 7.94822520267754e-06, "loss": 17.7165, "step": 17523 }, { "epoch": 0.32032463852889026, "grad_norm": 6.817369136315513, "learning_rate": 7.94798612008999e-06, "loss": 17.7406, "step": 17524 }, { "epoch": 0.3203429177253368, "grad_norm": 5.850563317955968, "learning_rate": 7.947747027170005e-06, "loss": 17.3355, "step": 17525 }, { "epoch": 0.32036119692178333, "grad_norm": 6.867214062864711, "learning_rate": 7.947507923918423e-06, "loss": 17.3336, "step": 17526 }, { "epoch": 0.32037947611822987, "grad_norm": 6.268486565383152, "learning_rate": 7.94726881033608e-06, "loss": 17.3672, "step": 17527 }, { "epoch": 0.32039775531467635, "grad_norm": 6.667181512188055, "learning_rate": 7.947029686423818e-06, "loss": 17.8645, "step": 17528 }, { "epoch": 0.3204160345111229, "grad_norm": 7.25862817451171, "learning_rate": 7.94679055218247e-06, "loss": 18.0934, "step": 17529 }, { "epoch": 0.3204343137075694, "grad_norm": 6.6850707239734835, "learning_rate": 7.94655140761288e-06, "loss": 17.288, "step": 17530 }, { "epoch": 0.32045259290401595, "grad_norm": 7.096240594452438, "learning_rate": 7.94631225271588e-06, "loss": 18.0013, "step": 17531 }, { "epoch": 0.3204708721004625, "grad_norm": 6.55634688373912, "learning_rate": 7.946073087492311e-06, "loss": 17.6222, "step": 17532 }, { "epoch": 0.32048915129690897, "grad_norm": 6.717447999377284, "learning_rate": 7.945833911943013e-06, "loss": 17.8644, "step": 17533 }, { "epoch": 0.3205074304933555, "grad_norm": 6.8972307919127305, "learning_rate": 7.945594726068823e-06, "loss": 17.9629, "step": 17534 }, { "epoch": 0.32052570968980204, "grad_norm": 6.469995544451611, "learning_rate": 7.945355529870578e-06, "loss": 17.4238, "step": 17535 }, { "epoch": 0.3205439888862486, "grad_norm": 5.857223663272877, "learning_rate": 7.945116323349119e-06, "loss": 17.3366, "step": 17536 }, { "epoch": 0.3205622680826951, "grad_norm": 7.036488914584075, "learning_rate": 7.944877106505282e-06, "loss": 17.7789, "step": 17537 }, { "epoch": 0.3205805472791416, "grad_norm": 6.284735969025044, "learning_rate": 7.944637879339907e-06, "loss": 17.4463, "step": 17538 }, { "epoch": 0.32059882647558813, "grad_norm": 5.871437555277933, "learning_rate": 7.94439864185383e-06, "loss": 17.4446, "step": 17539 }, { "epoch": 0.32061710567203466, "grad_norm": 6.1560330642973335, "learning_rate": 7.944159394047893e-06, "loss": 17.5608, "step": 17540 }, { "epoch": 0.3206353848684812, "grad_norm": 5.920449784092316, "learning_rate": 7.943920135922932e-06, "loss": 17.4074, "step": 17541 }, { "epoch": 0.3206536640649277, "grad_norm": 6.87684239240657, "learning_rate": 7.943680867479786e-06, "loss": 17.8309, "step": 17542 }, { "epoch": 0.3206719432613742, "grad_norm": 6.122805539581964, "learning_rate": 7.943441588719294e-06, "loss": 17.3828, "step": 17543 }, { "epoch": 0.32069022245782075, "grad_norm": 8.136131342916752, "learning_rate": 7.943202299642297e-06, "loss": 18.3353, "step": 17544 }, { "epoch": 0.3207085016542673, "grad_norm": 8.59593021747093, "learning_rate": 7.942963000249628e-06, "loss": 18.373, "step": 17545 }, { "epoch": 0.3207267808507138, "grad_norm": 6.971687487346667, "learning_rate": 7.94272369054213e-06, "loss": 17.6022, "step": 17546 }, { "epoch": 0.3207450600471603, "grad_norm": 5.31227976173666, "learning_rate": 7.942484370520643e-06, "loss": 17.1144, "step": 17547 }, { "epoch": 0.32076333924360684, "grad_norm": 6.780034685837173, "learning_rate": 7.942245040186e-06, "loss": 17.7526, "step": 17548 }, { "epoch": 0.3207816184400534, "grad_norm": 5.263989698779791, "learning_rate": 7.942005699539046e-06, "loss": 17.1315, "step": 17549 }, { "epoch": 0.3207998976364999, "grad_norm": 7.301086640136754, "learning_rate": 7.941766348580617e-06, "loss": 17.9126, "step": 17550 }, { "epoch": 0.32081817683294644, "grad_norm": 6.426251101219799, "learning_rate": 7.941526987311552e-06, "loss": 17.6605, "step": 17551 }, { "epoch": 0.3208364560293929, "grad_norm": 6.434784824376673, "learning_rate": 7.941287615732689e-06, "loss": 17.6406, "step": 17552 }, { "epoch": 0.32085473522583946, "grad_norm": 7.409805954713263, "learning_rate": 7.94104823384487e-06, "loss": 18.2674, "step": 17553 }, { "epoch": 0.320873014422286, "grad_norm": 5.58741255493611, "learning_rate": 7.940808841648932e-06, "loss": 17.3585, "step": 17554 }, { "epoch": 0.32089129361873253, "grad_norm": 7.277771121319455, "learning_rate": 7.940569439145714e-06, "loss": 18.0897, "step": 17555 }, { "epoch": 0.32090957281517907, "grad_norm": 6.72962489040817, "learning_rate": 7.940330026336055e-06, "loss": 17.694, "step": 17556 }, { "epoch": 0.32092785201162555, "grad_norm": 5.180622443703075, "learning_rate": 7.940090603220793e-06, "loss": 16.8818, "step": 17557 }, { "epoch": 0.3209461312080721, "grad_norm": 5.899524899822945, "learning_rate": 7.93985116980077e-06, "loss": 17.3021, "step": 17558 }, { "epoch": 0.3209644104045186, "grad_norm": 6.69713386327496, "learning_rate": 7.939611726076823e-06, "loss": 17.6658, "step": 17559 }, { "epoch": 0.32098268960096515, "grad_norm": 7.072623495168213, "learning_rate": 7.939372272049792e-06, "loss": 17.8161, "step": 17560 }, { "epoch": 0.3210009687974117, "grad_norm": 6.802736690860457, "learning_rate": 7.939132807720518e-06, "loss": 17.676, "step": 17561 }, { "epoch": 0.32101924799385817, "grad_norm": 4.864000088094914, "learning_rate": 7.938893333089837e-06, "loss": 16.9136, "step": 17562 }, { "epoch": 0.3210375271903047, "grad_norm": 5.849446161457291, "learning_rate": 7.93865384815859e-06, "loss": 17.5182, "step": 17563 }, { "epoch": 0.32105580638675124, "grad_norm": 6.494365172480418, "learning_rate": 7.938414352927618e-06, "loss": 17.6016, "step": 17564 }, { "epoch": 0.3210740855831978, "grad_norm": 6.639194863257415, "learning_rate": 7.938174847397758e-06, "loss": 17.6989, "step": 17565 }, { "epoch": 0.3210923647796443, "grad_norm": 7.944189344042548, "learning_rate": 7.937935331569848e-06, "loss": 18.2166, "step": 17566 }, { "epoch": 0.3211106439760908, "grad_norm": 7.286882368237209, "learning_rate": 7.93769580544473e-06, "loss": 18.2911, "step": 17567 }, { "epoch": 0.32112892317253733, "grad_norm": 6.926221020780543, "learning_rate": 7.937456269023245e-06, "loss": 17.6126, "step": 17568 }, { "epoch": 0.32114720236898386, "grad_norm": 6.692531238903858, "learning_rate": 7.93721672230623e-06, "loss": 17.605, "step": 17569 }, { "epoch": 0.3211654815654304, "grad_norm": 5.020600207226189, "learning_rate": 7.936977165294525e-06, "loss": 16.8796, "step": 17570 }, { "epoch": 0.32118376076187694, "grad_norm": 6.703917254732207, "learning_rate": 7.93673759798897e-06, "loss": 17.6535, "step": 17571 }, { "epoch": 0.3212020399583234, "grad_norm": 6.488071635110896, "learning_rate": 7.936498020390404e-06, "loss": 17.7118, "step": 17572 }, { "epoch": 0.32122031915476995, "grad_norm": 7.154042169177971, "learning_rate": 7.936258432499669e-06, "loss": 17.6759, "step": 17573 }, { "epoch": 0.3212385983512165, "grad_norm": 8.49823162600307, "learning_rate": 7.9360188343176e-06, "loss": 18.0933, "step": 17574 }, { "epoch": 0.321256877547663, "grad_norm": 8.807821376726649, "learning_rate": 7.935779225845042e-06, "loss": 18.4572, "step": 17575 }, { "epoch": 0.3212751567441095, "grad_norm": 6.9730719662996, "learning_rate": 7.935539607082832e-06, "loss": 17.8311, "step": 17576 }, { "epoch": 0.32129343594055604, "grad_norm": 6.399781052021891, "learning_rate": 7.935299978031811e-06, "loss": 17.7642, "step": 17577 }, { "epoch": 0.3213117151370026, "grad_norm": 6.923258449578971, "learning_rate": 7.935060338692817e-06, "loss": 17.9463, "step": 17578 }, { "epoch": 0.3213299943334491, "grad_norm": 5.661210167097001, "learning_rate": 7.934820689066693e-06, "loss": 17.1465, "step": 17579 }, { "epoch": 0.32134827352989564, "grad_norm": 6.156655129376134, "learning_rate": 7.934581029154276e-06, "loss": 17.6847, "step": 17580 }, { "epoch": 0.3213665527263421, "grad_norm": 6.773443607982263, "learning_rate": 7.934341358956409e-06, "loss": 17.7063, "step": 17581 }, { "epoch": 0.32138483192278866, "grad_norm": 6.826103831707497, "learning_rate": 7.934101678473926e-06, "loss": 17.7705, "step": 17582 }, { "epoch": 0.3214031111192352, "grad_norm": 5.698977585287464, "learning_rate": 7.933861987707675e-06, "loss": 17.18, "step": 17583 }, { "epoch": 0.32142139031568173, "grad_norm": 6.760545354895173, "learning_rate": 7.93362228665849e-06, "loss": 17.9606, "step": 17584 }, { "epoch": 0.32143966951212827, "grad_norm": 6.4923707747208566, "learning_rate": 7.933382575327216e-06, "loss": 17.5494, "step": 17585 }, { "epoch": 0.32145794870857475, "grad_norm": 5.761927297438746, "learning_rate": 7.933142853714689e-06, "loss": 17.4916, "step": 17586 }, { "epoch": 0.3214762279050213, "grad_norm": 7.406766303230393, "learning_rate": 7.932903121821749e-06, "loss": 17.8947, "step": 17587 }, { "epoch": 0.3214945071014678, "grad_norm": 5.591415923018626, "learning_rate": 7.93266337964924e-06, "loss": 17.3074, "step": 17588 }, { "epoch": 0.32151278629791435, "grad_norm": 6.4557917353124585, "learning_rate": 7.932423627198e-06, "loss": 17.693, "step": 17589 }, { "epoch": 0.3215310654943609, "grad_norm": 5.755025346279173, "learning_rate": 7.932183864468872e-06, "loss": 17.3218, "step": 17590 }, { "epoch": 0.32154934469080737, "grad_norm": 6.200545828607429, "learning_rate": 7.93194409146269e-06, "loss": 17.5762, "step": 17591 }, { "epoch": 0.3215676238872539, "grad_norm": 5.663094746456993, "learning_rate": 7.931704308180302e-06, "loss": 17.2843, "step": 17592 }, { "epoch": 0.32158590308370044, "grad_norm": 6.467995005492069, "learning_rate": 7.931464514622543e-06, "loss": 17.4465, "step": 17593 }, { "epoch": 0.321604182280147, "grad_norm": 5.806689306895789, "learning_rate": 7.931224710790256e-06, "loss": 17.2962, "step": 17594 }, { "epoch": 0.3216224614765935, "grad_norm": 6.154533597171496, "learning_rate": 7.93098489668428e-06, "loss": 17.3486, "step": 17595 }, { "epoch": 0.32164074067304, "grad_norm": 6.699972068362086, "learning_rate": 7.930745072305455e-06, "loss": 17.5267, "step": 17596 }, { "epoch": 0.32165901986948653, "grad_norm": 7.016208914675771, "learning_rate": 7.930505237654624e-06, "loss": 17.5877, "step": 17597 }, { "epoch": 0.32167729906593306, "grad_norm": 6.944053439932251, "learning_rate": 7.930265392732627e-06, "loss": 17.6566, "step": 17598 }, { "epoch": 0.3216955782623796, "grad_norm": 6.254643436663572, "learning_rate": 7.930025537540304e-06, "loss": 17.6416, "step": 17599 }, { "epoch": 0.32171385745882614, "grad_norm": 6.85258363988787, "learning_rate": 7.929785672078496e-06, "loss": 17.6077, "step": 17600 }, { "epoch": 0.3217321366552726, "grad_norm": 6.205739028806184, "learning_rate": 7.929545796348041e-06, "loss": 17.5122, "step": 17601 }, { "epoch": 0.32175041585171915, "grad_norm": 6.310597843555779, "learning_rate": 7.929305910349786e-06, "loss": 17.2942, "step": 17602 }, { "epoch": 0.3217686950481657, "grad_norm": 7.499436544521757, "learning_rate": 7.929066014084566e-06, "loss": 17.749, "step": 17603 }, { "epoch": 0.3217869742446122, "grad_norm": 6.284919539679724, "learning_rate": 7.928826107553224e-06, "loss": 17.513, "step": 17604 }, { "epoch": 0.32180525344105876, "grad_norm": 7.584473562317276, "learning_rate": 7.9285861907566e-06, "loss": 17.8977, "step": 17605 }, { "epoch": 0.32182353263750524, "grad_norm": 7.055192555998346, "learning_rate": 7.928346263695537e-06, "loss": 17.7729, "step": 17606 }, { "epoch": 0.3218418118339518, "grad_norm": 8.069726630065611, "learning_rate": 7.928106326370872e-06, "loss": 18.356, "step": 17607 }, { "epoch": 0.3218600910303983, "grad_norm": 5.606543136716627, "learning_rate": 7.92786637878345e-06, "loss": 17.06, "step": 17608 }, { "epoch": 0.32187837022684485, "grad_norm": 7.076976047593216, "learning_rate": 7.927626420934112e-06, "loss": 17.3924, "step": 17609 }, { "epoch": 0.3218966494232913, "grad_norm": 6.147490011363302, "learning_rate": 7.927386452823695e-06, "loss": 17.3598, "step": 17610 }, { "epoch": 0.32191492861973786, "grad_norm": 6.665162055800956, "learning_rate": 7.927146474453042e-06, "loss": 17.9429, "step": 17611 }, { "epoch": 0.3219332078161844, "grad_norm": 5.36051715475057, "learning_rate": 7.926906485822998e-06, "loss": 17.1047, "step": 17612 }, { "epoch": 0.32195148701263093, "grad_norm": 11.761879936366626, "learning_rate": 7.926666486934398e-06, "loss": 18.6834, "step": 17613 }, { "epoch": 0.32196976620907747, "grad_norm": 5.695281167138567, "learning_rate": 7.926426477788087e-06, "loss": 17.3025, "step": 17614 }, { "epoch": 0.32198804540552395, "grad_norm": 5.154080125994498, "learning_rate": 7.926186458384904e-06, "loss": 17.0674, "step": 17615 }, { "epoch": 0.3220063246019705, "grad_norm": 6.038050596825892, "learning_rate": 7.925946428725693e-06, "loss": 17.3455, "step": 17616 }, { "epoch": 0.322024603798417, "grad_norm": 5.41747520107593, "learning_rate": 7.925706388811293e-06, "loss": 17.1618, "step": 17617 }, { "epoch": 0.32204288299486356, "grad_norm": 5.824629910042442, "learning_rate": 7.925466338642545e-06, "loss": 17.137, "step": 17618 }, { "epoch": 0.3220611621913101, "grad_norm": 6.258364118403654, "learning_rate": 7.925226278220292e-06, "loss": 17.6541, "step": 17619 }, { "epoch": 0.32207944138775657, "grad_norm": 6.918020430505713, "learning_rate": 7.924986207545376e-06, "loss": 17.6106, "step": 17620 }, { "epoch": 0.3220977205842031, "grad_norm": 5.210507486396455, "learning_rate": 7.924746126618635e-06, "loss": 17.0473, "step": 17621 }, { "epoch": 0.32211599978064964, "grad_norm": 8.335482754109005, "learning_rate": 7.924506035440914e-06, "loss": 18.4042, "step": 17622 }, { "epoch": 0.3221342789770962, "grad_norm": 5.207931457502078, "learning_rate": 7.924265934013054e-06, "loss": 16.9483, "step": 17623 }, { "epoch": 0.3221525581735427, "grad_norm": 8.92538790907973, "learning_rate": 7.924025822335895e-06, "loss": 18.117, "step": 17624 }, { "epoch": 0.3221708373699892, "grad_norm": 6.976151972593718, "learning_rate": 7.923785700410276e-06, "loss": 18.046, "step": 17625 }, { "epoch": 0.32218911656643573, "grad_norm": 7.820570273505529, "learning_rate": 7.923545568237046e-06, "loss": 17.9882, "step": 17626 }, { "epoch": 0.32220739576288226, "grad_norm": 6.724563730350931, "learning_rate": 7.92330542581704e-06, "loss": 17.5323, "step": 17627 }, { "epoch": 0.3222256749593288, "grad_norm": 6.692164008823655, "learning_rate": 7.923065273151103e-06, "loss": 17.6357, "step": 17628 }, { "epoch": 0.32224395415577534, "grad_norm": 5.977387225591005, "learning_rate": 7.922825110240078e-06, "loss": 17.2419, "step": 17629 }, { "epoch": 0.3222622333522218, "grad_norm": 6.459570328727786, "learning_rate": 7.922584937084802e-06, "loss": 17.4161, "step": 17630 }, { "epoch": 0.32228051254866835, "grad_norm": 6.683616062881609, "learning_rate": 7.922344753686119e-06, "loss": 17.7386, "step": 17631 }, { "epoch": 0.3222987917451149, "grad_norm": 5.1633507876631, "learning_rate": 7.922104560044872e-06, "loss": 17.0346, "step": 17632 }, { "epoch": 0.3223170709415614, "grad_norm": 6.779004434736978, "learning_rate": 7.921864356161904e-06, "loss": 17.5749, "step": 17633 }, { "epoch": 0.32233535013800796, "grad_norm": 6.6448000556715705, "learning_rate": 7.921624142038053e-06, "loss": 17.4547, "step": 17634 }, { "epoch": 0.32235362933445444, "grad_norm": 6.9512427461943025, "learning_rate": 7.921383917674164e-06, "loss": 17.5671, "step": 17635 }, { "epoch": 0.322371908530901, "grad_norm": 7.072902040991003, "learning_rate": 7.921143683071076e-06, "loss": 17.8704, "step": 17636 }, { "epoch": 0.3223901877273475, "grad_norm": 6.973465940446415, "learning_rate": 7.920903438229635e-06, "loss": 17.8397, "step": 17637 }, { "epoch": 0.32240846692379405, "grad_norm": 5.468724935219691, "learning_rate": 7.920663183150679e-06, "loss": 17.2447, "step": 17638 }, { "epoch": 0.3224267461202406, "grad_norm": 8.139759119222475, "learning_rate": 7.920422917835054e-06, "loss": 18.1, "step": 17639 }, { "epoch": 0.32244502531668706, "grad_norm": 5.998672892071717, "learning_rate": 7.920182642283598e-06, "loss": 17.4788, "step": 17640 }, { "epoch": 0.3224633045131336, "grad_norm": 5.916220502412665, "learning_rate": 7.919942356497157e-06, "loss": 17.305, "step": 17641 }, { "epoch": 0.32248158370958013, "grad_norm": 6.748695405789091, "learning_rate": 7.91970206047657e-06, "loss": 17.831, "step": 17642 }, { "epoch": 0.32249986290602667, "grad_norm": 6.75706245539897, "learning_rate": 7.91946175422268e-06, "loss": 17.7215, "step": 17643 }, { "epoch": 0.32251814210247315, "grad_norm": 5.84519637319865, "learning_rate": 7.919221437736333e-06, "loss": 17.45, "step": 17644 }, { "epoch": 0.3225364212989197, "grad_norm": 5.426434874888759, "learning_rate": 7.918981111018365e-06, "loss": 17.0958, "step": 17645 }, { "epoch": 0.3225547004953662, "grad_norm": 8.686339613866407, "learning_rate": 7.918740774069623e-06, "loss": 18.4381, "step": 17646 }, { "epoch": 0.32257297969181276, "grad_norm": 6.0229604203282525, "learning_rate": 7.91850042689095e-06, "loss": 17.4061, "step": 17647 }, { "epoch": 0.3225912588882593, "grad_norm": 7.045232420341901, "learning_rate": 7.918260069483182e-06, "loss": 17.9396, "step": 17648 }, { "epoch": 0.32260953808470577, "grad_norm": 7.3879756288542335, "learning_rate": 7.918019701847168e-06, "loss": 18.1053, "step": 17649 }, { "epoch": 0.3226278172811523, "grad_norm": 7.417586721580354, "learning_rate": 7.917779323983748e-06, "loss": 18.019, "step": 17650 }, { "epoch": 0.32264609647759884, "grad_norm": 7.810734338045706, "learning_rate": 7.917538935893765e-06, "loss": 18.1668, "step": 17651 }, { "epoch": 0.3226643756740454, "grad_norm": 5.962226528997857, "learning_rate": 7.91729853757806e-06, "loss": 17.1979, "step": 17652 }, { "epoch": 0.3226826548704919, "grad_norm": 6.282755958032387, "learning_rate": 7.917058129037478e-06, "loss": 17.5572, "step": 17653 }, { "epoch": 0.3227009340669384, "grad_norm": 7.2950753549441245, "learning_rate": 7.91681771027286e-06, "loss": 17.8905, "step": 17654 }, { "epoch": 0.32271921326338493, "grad_norm": 6.718396391577625, "learning_rate": 7.916577281285048e-06, "loss": 17.5792, "step": 17655 }, { "epoch": 0.32273749245983147, "grad_norm": 5.407450209892771, "learning_rate": 7.916336842074888e-06, "loss": 17.3437, "step": 17656 }, { "epoch": 0.322755771656278, "grad_norm": 5.827207139776879, "learning_rate": 7.916096392643218e-06, "loss": 17.417, "step": 17657 }, { "epoch": 0.32277405085272454, "grad_norm": 6.742161879018509, "learning_rate": 7.915855932990884e-06, "loss": 17.5526, "step": 17658 }, { "epoch": 0.322792330049171, "grad_norm": 6.818318909808149, "learning_rate": 7.915615463118729e-06, "loss": 17.5256, "step": 17659 }, { "epoch": 0.32281060924561755, "grad_norm": 6.824640501856777, "learning_rate": 7.915374983027593e-06, "loss": 17.8469, "step": 17660 }, { "epoch": 0.3228288884420641, "grad_norm": 6.830626955606592, "learning_rate": 7.915134492718323e-06, "loss": 17.6978, "step": 17661 }, { "epoch": 0.3228471676385106, "grad_norm": 7.148219238642276, "learning_rate": 7.914893992191759e-06, "loss": 17.8267, "step": 17662 }, { "epoch": 0.32286544683495716, "grad_norm": 7.88725752829819, "learning_rate": 7.914653481448742e-06, "loss": 18.2131, "step": 17663 }, { "epoch": 0.32288372603140364, "grad_norm": 6.4780235233255095, "learning_rate": 7.914412960490118e-06, "loss": 17.4028, "step": 17664 }, { "epoch": 0.3229020052278502, "grad_norm": 5.945918683748106, "learning_rate": 7.914172429316733e-06, "loss": 17.4281, "step": 17665 }, { "epoch": 0.3229202844242967, "grad_norm": 6.167771083235213, "learning_rate": 7.913931887929423e-06, "loss": 17.312, "step": 17666 }, { "epoch": 0.32293856362074325, "grad_norm": 8.232554697348833, "learning_rate": 7.913691336329037e-06, "loss": 18.3315, "step": 17667 }, { "epoch": 0.3229568428171898, "grad_norm": 6.99789188139659, "learning_rate": 7.913450774516415e-06, "loss": 17.6496, "step": 17668 }, { "epoch": 0.32297512201363626, "grad_norm": 6.172405494613258, "learning_rate": 7.9132102024924e-06, "loss": 17.3793, "step": 17669 }, { "epoch": 0.3229934012100828, "grad_norm": 6.875727755040785, "learning_rate": 7.912969620257835e-06, "loss": 17.3903, "step": 17670 }, { "epoch": 0.32301168040652933, "grad_norm": 8.725162881721227, "learning_rate": 7.912729027813568e-06, "loss": 18.6348, "step": 17671 }, { "epoch": 0.32302995960297587, "grad_norm": 5.532490516428547, "learning_rate": 7.912488425160436e-06, "loss": 17.2522, "step": 17672 }, { "epoch": 0.3230482387994224, "grad_norm": 5.84360916027288, "learning_rate": 7.912247812299283e-06, "loss": 17.2939, "step": 17673 }, { "epoch": 0.3230665179958689, "grad_norm": 8.111220523515845, "learning_rate": 7.912007189230957e-06, "loss": 18.5528, "step": 17674 }, { "epoch": 0.3230847971923154, "grad_norm": 7.7266110967014425, "learning_rate": 7.911766555956297e-06, "loss": 18.3292, "step": 17675 }, { "epoch": 0.32310307638876196, "grad_norm": 6.113899516838553, "learning_rate": 7.91152591247615e-06, "loss": 17.3183, "step": 17676 }, { "epoch": 0.3231213555852085, "grad_norm": 4.764840947501727, "learning_rate": 7.911285258791355e-06, "loss": 17.0315, "step": 17677 }, { "epoch": 0.32313963478165497, "grad_norm": 5.930207843666761, "learning_rate": 7.91104459490276e-06, "loss": 17.4221, "step": 17678 }, { "epoch": 0.3231579139781015, "grad_norm": 6.575485128174704, "learning_rate": 7.910803920811203e-06, "loss": 17.528, "step": 17679 }, { "epoch": 0.32317619317454804, "grad_norm": 7.029879562926375, "learning_rate": 7.910563236517534e-06, "loss": 17.6705, "step": 17680 }, { "epoch": 0.3231944723709946, "grad_norm": 7.354106194436128, "learning_rate": 7.910322542022591e-06, "loss": 17.7459, "step": 17681 }, { "epoch": 0.3232127515674411, "grad_norm": 6.873088354184548, "learning_rate": 7.91008183732722e-06, "loss": 17.7918, "step": 17682 }, { "epoch": 0.3232310307638876, "grad_norm": 6.241485631431879, "learning_rate": 7.909841122432269e-06, "loss": 17.7582, "step": 17683 }, { "epoch": 0.32324930996033413, "grad_norm": 6.642653081716574, "learning_rate": 7.909600397338573e-06, "loss": 17.5774, "step": 17684 }, { "epoch": 0.32326758915678067, "grad_norm": 7.312009719374648, "learning_rate": 7.909359662046983e-06, "loss": 17.7914, "step": 17685 }, { "epoch": 0.3232858683532272, "grad_norm": 7.6530235791674, "learning_rate": 7.909118916558338e-06, "loss": 18.1013, "step": 17686 }, { "epoch": 0.32330414754967374, "grad_norm": 5.896039758083264, "learning_rate": 7.908878160873483e-06, "loss": 17.2169, "step": 17687 }, { "epoch": 0.3233224267461202, "grad_norm": 7.229472362374189, "learning_rate": 7.908637394993265e-06, "loss": 17.7046, "step": 17688 }, { "epoch": 0.32334070594256675, "grad_norm": 6.181143582624684, "learning_rate": 7.90839661891852e-06, "loss": 17.3552, "step": 17689 }, { "epoch": 0.3233589851390133, "grad_norm": 6.751168869331026, "learning_rate": 7.908155832650103e-06, "loss": 17.6183, "step": 17690 }, { "epoch": 0.3233772643354598, "grad_norm": 5.3818619076095136, "learning_rate": 7.90791503618885e-06, "loss": 17.0947, "step": 17691 }, { "epoch": 0.32339554353190636, "grad_norm": 7.173813864648602, "learning_rate": 7.907674229535606e-06, "loss": 17.7175, "step": 17692 }, { "epoch": 0.32341382272835284, "grad_norm": 6.849343874448063, "learning_rate": 7.907433412691218e-06, "loss": 17.6358, "step": 17693 }, { "epoch": 0.3234321019247994, "grad_norm": 5.811953914809876, "learning_rate": 7.907192585656528e-06, "loss": 17.3599, "step": 17694 }, { "epoch": 0.3234503811212459, "grad_norm": 6.406612554200852, "learning_rate": 7.90695174843238e-06, "loss": 17.3569, "step": 17695 }, { "epoch": 0.32346866031769245, "grad_norm": 5.3506577758741125, "learning_rate": 7.906710901019618e-06, "loss": 17.1093, "step": 17696 }, { "epoch": 0.323486939514139, "grad_norm": 7.877884365220087, "learning_rate": 7.906470043419086e-06, "loss": 18.4213, "step": 17697 }, { "epoch": 0.32350521871058546, "grad_norm": 6.736546281178132, "learning_rate": 7.90622917563163e-06, "loss": 17.8381, "step": 17698 }, { "epoch": 0.323523497907032, "grad_norm": 7.589485775589006, "learning_rate": 7.905988297658093e-06, "loss": 18.1189, "step": 17699 }, { "epoch": 0.32354177710347853, "grad_norm": 5.904388765821344, "learning_rate": 7.905747409499318e-06, "loss": 17.4213, "step": 17700 }, { "epoch": 0.32356005629992507, "grad_norm": 7.315703403965148, "learning_rate": 7.905506511156151e-06, "loss": 17.8598, "step": 17701 }, { "epoch": 0.3235783354963716, "grad_norm": 7.147717361011801, "learning_rate": 7.905265602629435e-06, "loss": 18.0147, "step": 17702 }, { "epoch": 0.3235966146928181, "grad_norm": 8.356688813804935, "learning_rate": 7.905024683920018e-06, "loss": 18.8452, "step": 17703 }, { "epoch": 0.3236148938892646, "grad_norm": 5.914228540617025, "learning_rate": 7.904783755028738e-06, "loss": 17.2188, "step": 17704 }, { "epoch": 0.32363317308571116, "grad_norm": 5.47241798190702, "learning_rate": 7.904542815956444e-06, "loss": 17.2184, "step": 17705 }, { "epoch": 0.3236514522821577, "grad_norm": 5.4748724835386104, "learning_rate": 7.90430186670398e-06, "loss": 17.1407, "step": 17706 }, { "epoch": 0.32366973147860423, "grad_norm": 7.256228296101664, "learning_rate": 7.90406090727219e-06, "loss": 17.7795, "step": 17707 }, { "epoch": 0.3236880106750507, "grad_norm": 7.323442326843513, "learning_rate": 7.90381993766192e-06, "loss": 18.3803, "step": 17708 }, { "epoch": 0.32370628987149724, "grad_norm": 6.627845023069605, "learning_rate": 7.903578957874012e-06, "loss": 17.4024, "step": 17709 }, { "epoch": 0.3237245690679438, "grad_norm": 7.420892493312514, "learning_rate": 7.90333796790931e-06, "loss": 18.1122, "step": 17710 }, { "epoch": 0.3237428482643903, "grad_norm": 7.30268088293728, "learning_rate": 7.903096967768662e-06, "loss": 18.3343, "step": 17711 }, { "epoch": 0.3237611274608368, "grad_norm": 6.287198503197101, "learning_rate": 7.902855957452911e-06, "loss": 17.4339, "step": 17712 }, { "epoch": 0.32377940665728333, "grad_norm": 6.465856027196392, "learning_rate": 7.902614936962902e-06, "loss": 17.6532, "step": 17713 }, { "epoch": 0.32379768585372987, "grad_norm": 6.308087695928773, "learning_rate": 7.902373906299479e-06, "loss": 17.4322, "step": 17714 }, { "epoch": 0.3238159650501764, "grad_norm": 6.605016306857635, "learning_rate": 7.902132865463487e-06, "loss": 17.5501, "step": 17715 }, { "epoch": 0.32383424424662294, "grad_norm": 5.748431837972686, "learning_rate": 7.901891814455772e-06, "loss": 17.503, "step": 17716 }, { "epoch": 0.3238525234430694, "grad_norm": 7.569285201614199, "learning_rate": 7.901650753277177e-06, "loss": 18.0239, "step": 17717 }, { "epoch": 0.32387080263951595, "grad_norm": 8.720107472605482, "learning_rate": 7.901409681928548e-06, "loss": 18.1893, "step": 17718 }, { "epoch": 0.3238890818359625, "grad_norm": 6.799628458198373, "learning_rate": 7.90116860041073e-06, "loss": 18.034, "step": 17719 }, { "epoch": 0.323907361032409, "grad_norm": 7.660477877385908, "learning_rate": 7.90092750872457e-06, "loss": 18.111, "step": 17720 }, { "epoch": 0.32392564022885556, "grad_norm": 6.272439255187087, "learning_rate": 7.900686406870908e-06, "loss": 17.3672, "step": 17721 }, { "epoch": 0.32394391942530204, "grad_norm": 6.880732583845493, "learning_rate": 7.900445294850591e-06, "loss": 17.4492, "step": 17722 }, { "epoch": 0.3239621986217486, "grad_norm": 6.76794043338143, "learning_rate": 7.900204172664468e-06, "loss": 17.4717, "step": 17723 }, { "epoch": 0.3239804778181951, "grad_norm": 6.342803464392752, "learning_rate": 7.89996304031338e-06, "loss": 17.3245, "step": 17724 }, { "epoch": 0.32399875701464165, "grad_norm": 8.656431088842632, "learning_rate": 7.899721897798172e-06, "loss": 18.5138, "step": 17725 }, { "epoch": 0.3240170362110882, "grad_norm": 6.08280644000809, "learning_rate": 7.899480745119693e-06, "loss": 17.5171, "step": 17726 }, { "epoch": 0.32403531540753466, "grad_norm": 6.808667970082966, "learning_rate": 7.899239582278783e-06, "loss": 17.5517, "step": 17727 }, { "epoch": 0.3240535946039812, "grad_norm": 6.885294328924785, "learning_rate": 7.898998409276291e-06, "loss": 17.6877, "step": 17728 }, { "epoch": 0.32407187380042773, "grad_norm": 6.599931734145278, "learning_rate": 7.89875722611306e-06, "loss": 17.6172, "step": 17729 }, { "epoch": 0.32409015299687427, "grad_norm": 5.370720150179787, "learning_rate": 7.898516032789937e-06, "loss": 17.0975, "step": 17730 }, { "epoch": 0.3241084321933208, "grad_norm": 5.709689429617202, "learning_rate": 7.898274829307769e-06, "loss": 17.2393, "step": 17731 }, { "epoch": 0.3241267113897673, "grad_norm": 6.0834948297296645, "learning_rate": 7.898033615667395e-06, "loss": 17.4599, "step": 17732 }, { "epoch": 0.3241449905862138, "grad_norm": 6.197853953703129, "learning_rate": 7.897792391869668e-06, "loss": 17.4502, "step": 17733 }, { "epoch": 0.32416326978266036, "grad_norm": 6.903159381393967, "learning_rate": 7.89755115791543e-06, "loss": 17.6691, "step": 17734 }, { "epoch": 0.3241815489791069, "grad_norm": 6.520885784867376, "learning_rate": 7.897309913805525e-06, "loss": 17.7851, "step": 17735 }, { "epoch": 0.32419982817555343, "grad_norm": 6.8901319836075805, "learning_rate": 7.8970686595408e-06, "loss": 17.6095, "step": 17736 }, { "epoch": 0.3242181073719999, "grad_norm": 6.546169171931813, "learning_rate": 7.896827395122102e-06, "loss": 17.2313, "step": 17737 }, { "epoch": 0.32423638656844644, "grad_norm": 7.915518776957184, "learning_rate": 7.896586120550276e-06, "loss": 17.9864, "step": 17738 }, { "epoch": 0.324254665764893, "grad_norm": 6.9312848548921435, "learning_rate": 7.896344835826166e-06, "loss": 17.8592, "step": 17739 }, { "epoch": 0.3242729449613395, "grad_norm": 5.485400336636975, "learning_rate": 7.89610354095062e-06, "loss": 17.3208, "step": 17740 }, { "epoch": 0.32429122415778605, "grad_norm": 7.078439224696487, "learning_rate": 7.895862235924481e-06, "loss": 18.073, "step": 17741 }, { "epoch": 0.32430950335423253, "grad_norm": 6.53713902367002, "learning_rate": 7.895620920748594e-06, "loss": 17.5156, "step": 17742 }, { "epoch": 0.32432778255067907, "grad_norm": 6.786474194080707, "learning_rate": 7.895379595423809e-06, "loss": 17.6559, "step": 17743 }, { "epoch": 0.3243460617471256, "grad_norm": 6.124202454779582, "learning_rate": 7.895138259950972e-06, "loss": 17.5843, "step": 17744 }, { "epoch": 0.32436434094357214, "grad_norm": 6.803613532007667, "learning_rate": 7.894896914330925e-06, "loss": 17.6096, "step": 17745 }, { "epoch": 0.3243826201400186, "grad_norm": 5.626643891123522, "learning_rate": 7.894655558564514e-06, "loss": 17.1485, "step": 17746 }, { "epoch": 0.32440089933646515, "grad_norm": 5.9320916818188785, "learning_rate": 7.894414192652589e-06, "loss": 17.1957, "step": 17747 }, { "epoch": 0.3244191785329117, "grad_norm": 7.825310368883438, "learning_rate": 7.89417281659599e-06, "loss": 18.514, "step": 17748 }, { "epoch": 0.3244374577293582, "grad_norm": 5.832333198813054, "learning_rate": 7.89393143039557e-06, "loss": 17.4828, "step": 17749 }, { "epoch": 0.32445573692580476, "grad_norm": 7.19840251586273, "learning_rate": 7.893690034052167e-06, "loss": 18.1829, "step": 17750 }, { "epoch": 0.32447401612225124, "grad_norm": 6.9550916640503875, "learning_rate": 7.893448627566637e-06, "loss": 17.4698, "step": 17751 }, { "epoch": 0.3244922953186978, "grad_norm": 6.196420070907171, "learning_rate": 7.893207210939817e-06, "loss": 17.5809, "step": 17752 }, { "epoch": 0.3245105745151443, "grad_norm": 6.916335868485267, "learning_rate": 7.892965784172558e-06, "loss": 17.642, "step": 17753 }, { "epoch": 0.32452885371159085, "grad_norm": 7.6881850398519616, "learning_rate": 7.892724347265706e-06, "loss": 18.3688, "step": 17754 }, { "epoch": 0.3245471329080374, "grad_norm": 6.320467909081312, "learning_rate": 7.892482900220105e-06, "loss": 17.5321, "step": 17755 }, { "epoch": 0.32456541210448386, "grad_norm": 5.970622163186657, "learning_rate": 7.892241443036601e-06, "loss": 17.6554, "step": 17756 }, { "epoch": 0.3245836913009304, "grad_norm": 6.518388630649442, "learning_rate": 7.891999975716043e-06, "loss": 17.7403, "step": 17757 }, { "epoch": 0.32460197049737693, "grad_norm": 8.581149135678286, "learning_rate": 7.891758498259277e-06, "loss": 18.1474, "step": 17758 }, { "epoch": 0.32462024969382347, "grad_norm": 6.430791470956686, "learning_rate": 7.891517010667147e-06, "loss": 17.4294, "step": 17759 }, { "epoch": 0.32463852889027, "grad_norm": 6.747363986693666, "learning_rate": 7.891275512940502e-06, "loss": 17.7416, "step": 17760 }, { "epoch": 0.3246568080867165, "grad_norm": 6.648622315508402, "learning_rate": 7.891034005080188e-06, "loss": 17.8707, "step": 17761 }, { "epoch": 0.324675087283163, "grad_norm": 7.107497351581862, "learning_rate": 7.890792487087049e-06, "loss": 17.6806, "step": 17762 }, { "epoch": 0.32469336647960956, "grad_norm": 6.519000431193044, "learning_rate": 7.890550958961933e-06, "loss": 17.4322, "step": 17763 }, { "epoch": 0.3247116456760561, "grad_norm": 6.180686488773445, "learning_rate": 7.890309420705686e-06, "loss": 17.4595, "step": 17764 }, { "epoch": 0.32472992487250263, "grad_norm": 6.813454925467863, "learning_rate": 7.890067872319158e-06, "loss": 17.3925, "step": 17765 }, { "epoch": 0.3247482040689491, "grad_norm": 7.281394317189339, "learning_rate": 7.88982631380319e-06, "loss": 17.4958, "step": 17766 }, { "epoch": 0.32476648326539564, "grad_norm": 6.22838325671392, "learning_rate": 7.889584745158634e-06, "loss": 17.5235, "step": 17767 }, { "epoch": 0.3247847624618422, "grad_norm": 7.7460781767421345, "learning_rate": 7.889343166386334e-06, "loss": 17.707, "step": 17768 }, { "epoch": 0.3248030416582887, "grad_norm": 6.367076966095275, "learning_rate": 7.889101577487134e-06, "loss": 17.6144, "step": 17769 }, { "epoch": 0.32482132085473525, "grad_norm": 6.420686888235853, "learning_rate": 7.888859978461887e-06, "loss": 17.7745, "step": 17770 }, { "epoch": 0.32483960005118173, "grad_norm": 6.926027327496974, "learning_rate": 7.888618369311436e-06, "loss": 17.5875, "step": 17771 }, { "epoch": 0.32485787924762827, "grad_norm": 6.695980001231234, "learning_rate": 7.888376750036626e-06, "loss": 17.4798, "step": 17772 }, { "epoch": 0.3248761584440748, "grad_norm": 6.401478752069769, "learning_rate": 7.888135120638309e-06, "loss": 17.4129, "step": 17773 }, { "epoch": 0.32489443764052134, "grad_norm": 6.690387221084487, "learning_rate": 7.887893481117327e-06, "loss": 17.6467, "step": 17774 }, { "epoch": 0.3249127168369679, "grad_norm": 6.288090431810262, "learning_rate": 7.88765183147453e-06, "loss": 17.5896, "step": 17775 }, { "epoch": 0.32493099603341435, "grad_norm": 6.856296727872927, "learning_rate": 7.887410171710764e-06, "loss": 17.6377, "step": 17776 }, { "epoch": 0.3249492752298609, "grad_norm": 7.220036562946701, "learning_rate": 7.887168501826874e-06, "loss": 18.1338, "step": 17777 }, { "epoch": 0.3249675544263074, "grad_norm": 6.856851184796004, "learning_rate": 7.88692682182371e-06, "loss": 17.7401, "step": 17778 }, { "epoch": 0.32498583362275396, "grad_norm": 8.166159479680575, "learning_rate": 7.886685131702118e-06, "loss": 18.38, "step": 17779 }, { "epoch": 0.32500411281920044, "grad_norm": 7.24517999511875, "learning_rate": 7.886443431462946e-06, "loss": 17.9074, "step": 17780 }, { "epoch": 0.325022392015647, "grad_norm": 7.729271856325883, "learning_rate": 7.886201721107041e-06, "loss": 18.1386, "step": 17781 }, { "epoch": 0.3250406712120935, "grad_norm": 6.002140416107779, "learning_rate": 7.885960000635247e-06, "loss": 17.3604, "step": 17782 }, { "epoch": 0.32505895040854005, "grad_norm": 6.484218390540682, "learning_rate": 7.885718270048414e-06, "loss": 17.3894, "step": 17783 }, { "epoch": 0.3250772296049866, "grad_norm": 6.5765930619465, "learning_rate": 7.885476529347391e-06, "loss": 17.8014, "step": 17784 }, { "epoch": 0.32509550880143306, "grad_norm": 7.923834403163036, "learning_rate": 7.885234778533022e-06, "loss": 18.1465, "step": 17785 }, { "epoch": 0.3251137879978796, "grad_norm": 6.430724149334308, "learning_rate": 7.884993017606155e-06, "loss": 17.4932, "step": 17786 }, { "epoch": 0.32513206719432614, "grad_norm": 6.77751500942763, "learning_rate": 7.884751246567637e-06, "loss": 18.0722, "step": 17787 }, { "epoch": 0.32515034639077267, "grad_norm": 6.186405548945096, "learning_rate": 7.884509465418318e-06, "loss": 17.6653, "step": 17788 }, { "epoch": 0.3251686255872192, "grad_norm": 7.268120331299511, "learning_rate": 7.884267674159043e-06, "loss": 17.9533, "step": 17789 }, { "epoch": 0.3251869047836657, "grad_norm": 7.245866146128734, "learning_rate": 7.884025872790661e-06, "loss": 18.1388, "step": 17790 }, { "epoch": 0.3252051839801122, "grad_norm": 6.500089085904076, "learning_rate": 7.883784061314017e-06, "loss": 17.6386, "step": 17791 }, { "epoch": 0.32522346317655876, "grad_norm": 6.76226009888217, "learning_rate": 7.88354223972996e-06, "loss": 17.5732, "step": 17792 }, { "epoch": 0.3252417423730053, "grad_norm": 5.529002913461683, "learning_rate": 7.883300408039338e-06, "loss": 17.024, "step": 17793 }, { "epoch": 0.32526002156945183, "grad_norm": 6.280103600881916, "learning_rate": 7.883058566243e-06, "loss": 17.3343, "step": 17794 }, { "epoch": 0.3252783007658983, "grad_norm": 5.537614534548059, "learning_rate": 7.88281671434179e-06, "loss": 17.201, "step": 17795 }, { "epoch": 0.32529657996234485, "grad_norm": 6.842566450357201, "learning_rate": 7.882574852336558e-06, "loss": 17.7165, "step": 17796 }, { "epoch": 0.3253148591587914, "grad_norm": 6.62388762569798, "learning_rate": 7.882332980228151e-06, "loss": 17.5779, "step": 17797 }, { "epoch": 0.3253331383552379, "grad_norm": 7.81704242010747, "learning_rate": 7.882091098017417e-06, "loss": 18.1819, "step": 17798 }, { "epoch": 0.32535141755168445, "grad_norm": 8.17155499574659, "learning_rate": 7.881849205705206e-06, "loss": 18.1628, "step": 17799 }, { "epoch": 0.32536969674813093, "grad_norm": 5.228344861810051, "learning_rate": 7.881607303292361e-06, "loss": 17.1929, "step": 17800 }, { "epoch": 0.32538797594457747, "grad_norm": 6.478130051775579, "learning_rate": 7.881365390779734e-06, "loss": 17.7408, "step": 17801 }, { "epoch": 0.325406255141024, "grad_norm": 7.205583581384945, "learning_rate": 7.881123468168169e-06, "loss": 17.7589, "step": 17802 }, { "epoch": 0.32542453433747054, "grad_norm": 8.659392100726214, "learning_rate": 7.880881535458519e-06, "loss": 18.608, "step": 17803 }, { "epoch": 0.3254428135339171, "grad_norm": 6.691558341576043, "learning_rate": 7.880639592651628e-06, "loss": 17.315, "step": 17804 }, { "epoch": 0.32546109273036355, "grad_norm": 6.824648938193877, "learning_rate": 7.880397639748346e-06, "loss": 17.7161, "step": 17805 }, { "epoch": 0.3254793719268101, "grad_norm": 5.681769825030666, "learning_rate": 7.88015567674952e-06, "loss": 17.378, "step": 17806 }, { "epoch": 0.3254976511232566, "grad_norm": 7.590131225088655, "learning_rate": 7.879913703655997e-06, "loss": 17.7448, "step": 17807 }, { "epoch": 0.32551593031970316, "grad_norm": 6.873404346881098, "learning_rate": 7.879671720468626e-06, "loss": 17.6415, "step": 17808 }, { "epoch": 0.3255342095161497, "grad_norm": 8.40510328880946, "learning_rate": 7.879429727188257e-06, "loss": 17.7219, "step": 17809 }, { "epoch": 0.3255524887125962, "grad_norm": 6.477972532371386, "learning_rate": 7.879187723815737e-06, "loss": 17.7117, "step": 17810 }, { "epoch": 0.3255707679090427, "grad_norm": 7.337673120293114, "learning_rate": 7.878945710351913e-06, "loss": 17.9361, "step": 17811 }, { "epoch": 0.32558904710548925, "grad_norm": 6.719375226765658, "learning_rate": 7.878703686797634e-06, "loss": 17.4639, "step": 17812 }, { "epoch": 0.3256073263019358, "grad_norm": 5.282211402681556, "learning_rate": 7.878461653153749e-06, "loss": 17.2107, "step": 17813 }, { "epoch": 0.32562560549838226, "grad_norm": 6.322777752947682, "learning_rate": 7.878219609421105e-06, "loss": 17.5931, "step": 17814 }, { "epoch": 0.3256438846948288, "grad_norm": 6.335835715646217, "learning_rate": 7.87797755560055e-06, "loss": 17.7482, "step": 17815 }, { "epoch": 0.32566216389127534, "grad_norm": 6.695294309727545, "learning_rate": 7.877735491692937e-06, "loss": 17.9087, "step": 17816 }, { "epoch": 0.32568044308772187, "grad_norm": 6.898742884933421, "learning_rate": 7.877493417699109e-06, "loss": 17.8053, "step": 17817 }, { "epoch": 0.3256987222841684, "grad_norm": 5.513199510821228, "learning_rate": 7.877251333619916e-06, "loss": 17.1758, "step": 17818 }, { "epoch": 0.3257170014806149, "grad_norm": 5.876307084156893, "learning_rate": 7.877009239456206e-06, "loss": 17.3925, "step": 17819 }, { "epoch": 0.3257352806770614, "grad_norm": 7.402268251489208, "learning_rate": 7.876767135208829e-06, "loss": 17.6846, "step": 17820 }, { "epoch": 0.32575355987350796, "grad_norm": 6.426810501598508, "learning_rate": 7.876525020878632e-06, "loss": 17.7161, "step": 17821 }, { "epoch": 0.3257718390699545, "grad_norm": 6.662116041025357, "learning_rate": 7.876282896466465e-06, "loss": 17.7204, "step": 17822 }, { "epoch": 0.32579011826640103, "grad_norm": 5.774182478325658, "learning_rate": 7.876040761973179e-06, "loss": 17.3396, "step": 17823 }, { "epoch": 0.3258083974628475, "grad_norm": 6.616518385284222, "learning_rate": 7.875798617399614e-06, "loss": 17.5979, "step": 17824 }, { "epoch": 0.32582667665929405, "grad_norm": 6.846705875031634, "learning_rate": 7.875556462746628e-06, "loss": 17.9477, "step": 17825 }, { "epoch": 0.3258449558557406, "grad_norm": 5.693870640415677, "learning_rate": 7.875314298015065e-06, "loss": 17.2138, "step": 17826 }, { "epoch": 0.3258632350521871, "grad_norm": 6.969038422382312, "learning_rate": 7.875072123205776e-06, "loss": 18.0013, "step": 17827 }, { "epoch": 0.32588151424863365, "grad_norm": 6.1486335454418874, "learning_rate": 7.874829938319608e-06, "loss": 17.4233, "step": 17828 }, { "epoch": 0.32589979344508013, "grad_norm": 6.132878567881788, "learning_rate": 7.87458774335741e-06, "loss": 17.4989, "step": 17829 }, { "epoch": 0.32591807264152667, "grad_norm": 6.537418428041828, "learning_rate": 7.874345538320033e-06, "loss": 17.5308, "step": 17830 }, { "epoch": 0.3259363518379732, "grad_norm": 7.185045793392967, "learning_rate": 7.874103323208323e-06, "loss": 17.5923, "step": 17831 }, { "epoch": 0.32595463103441974, "grad_norm": 7.792166666071645, "learning_rate": 7.87386109802313e-06, "loss": 18.4338, "step": 17832 }, { "epoch": 0.3259729102308663, "grad_norm": 5.88940091915513, "learning_rate": 7.873618862765305e-06, "loss": 17.4111, "step": 17833 }, { "epoch": 0.32599118942731276, "grad_norm": 5.0957149338845635, "learning_rate": 7.873376617435693e-06, "loss": 17.0923, "step": 17834 }, { "epoch": 0.3260094686237593, "grad_norm": 6.947892487435096, "learning_rate": 7.873134362035147e-06, "loss": 17.7021, "step": 17835 }, { "epoch": 0.3260277478202058, "grad_norm": 6.767897076797359, "learning_rate": 7.872892096564512e-06, "loss": 17.6243, "step": 17836 }, { "epoch": 0.32604602701665236, "grad_norm": 5.5962489430574625, "learning_rate": 7.872649821024642e-06, "loss": 17.0655, "step": 17837 }, { "epoch": 0.3260643062130989, "grad_norm": 7.541190100036091, "learning_rate": 7.872407535416384e-06, "loss": 18.0704, "step": 17838 }, { "epoch": 0.3260825854095454, "grad_norm": 7.35367908788049, "learning_rate": 7.872165239740585e-06, "loss": 18.224, "step": 17839 }, { "epoch": 0.3261008646059919, "grad_norm": 6.867253190417915, "learning_rate": 7.871922933998098e-06, "loss": 17.7978, "step": 17840 }, { "epoch": 0.32611914380243845, "grad_norm": 7.990110756551321, "learning_rate": 7.871680618189768e-06, "loss": 17.9894, "step": 17841 }, { "epoch": 0.326137422998885, "grad_norm": 5.920388531886041, "learning_rate": 7.871438292316448e-06, "loss": 17.292, "step": 17842 }, { "epoch": 0.3261557021953315, "grad_norm": 5.815565367184524, "learning_rate": 7.871195956378985e-06, "loss": 17.4709, "step": 17843 }, { "epoch": 0.326173981391778, "grad_norm": 5.669661110095847, "learning_rate": 7.870953610378231e-06, "loss": 17.2476, "step": 17844 }, { "epoch": 0.32619226058822454, "grad_norm": 6.000059489859444, "learning_rate": 7.870711254315031e-06, "loss": 17.153, "step": 17845 }, { "epoch": 0.32621053978467107, "grad_norm": 6.574262859048824, "learning_rate": 7.870468888190239e-06, "loss": 17.281, "step": 17846 }, { "epoch": 0.3262288189811176, "grad_norm": 6.777443581943335, "learning_rate": 7.870226512004704e-06, "loss": 17.566, "step": 17847 }, { "epoch": 0.3262470981775641, "grad_norm": 7.712616665092374, "learning_rate": 7.869984125759272e-06, "loss": 18.2918, "step": 17848 }, { "epoch": 0.3262653773740106, "grad_norm": 7.503483333333809, "learning_rate": 7.869741729454797e-06, "loss": 17.9509, "step": 17849 }, { "epoch": 0.32628365657045716, "grad_norm": 6.6588036093839165, "learning_rate": 7.869499323092122e-06, "loss": 17.5324, "step": 17850 }, { "epoch": 0.3263019357669037, "grad_norm": 6.430826262770365, "learning_rate": 7.869256906672104e-06, "loss": 17.451, "step": 17851 }, { "epoch": 0.32632021496335023, "grad_norm": 7.02650368230658, "learning_rate": 7.869014480195589e-06, "loss": 17.9679, "step": 17852 }, { "epoch": 0.3263384941597967, "grad_norm": 6.164166018539878, "learning_rate": 7.868772043663429e-06, "loss": 17.3597, "step": 17853 }, { "epoch": 0.32635677335624325, "grad_norm": 6.413715747359366, "learning_rate": 7.868529597076469e-06, "loss": 17.8651, "step": 17854 }, { "epoch": 0.3263750525526898, "grad_norm": 6.470010343951983, "learning_rate": 7.868287140435564e-06, "loss": 17.5322, "step": 17855 }, { "epoch": 0.3263933317491363, "grad_norm": 6.276310738559672, "learning_rate": 7.86804467374156e-06, "loss": 17.6477, "step": 17856 }, { "epoch": 0.32641161094558285, "grad_norm": 6.24266804974688, "learning_rate": 7.867802196995308e-06, "loss": 17.4857, "step": 17857 }, { "epoch": 0.32642989014202933, "grad_norm": 6.496002462917033, "learning_rate": 7.867559710197658e-06, "loss": 17.4417, "step": 17858 }, { "epoch": 0.32644816933847587, "grad_norm": 7.161516651354113, "learning_rate": 7.867317213349461e-06, "loss": 18.0931, "step": 17859 }, { "epoch": 0.3264664485349224, "grad_norm": 6.511777079840866, "learning_rate": 7.867074706451567e-06, "loss": 17.4908, "step": 17860 }, { "epoch": 0.32648472773136894, "grad_norm": 7.188666024752628, "learning_rate": 7.866832189504823e-06, "loss": 17.5326, "step": 17861 }, { "epoch": 0.3265030069278155, "grad_norm": 7.008425777054819, "learning_rate": 7.866589662510083e-06, "loss": 17.9847, "step": 17862 }, { "epoch": 0.32652128612426196, "grad_norm": 7.068713305265701, "learning_rate": 7.866347125468192e-06, "loss": 17.8885, "step": 17863 }, { "epoch": 0.3265395653207085, "grad_norm": 5.917749223184772, "learning_rate": 7.866104578380005e-06, "loss": 17.7064, "step": 17864 }, { "epoch": 0.326557844517155, "grad_norm": 6.621049855079859, "learning_rate": 7.86586202124637e-06, "loss": 17.6552, "step": 17865 }, { "epoch": 0.32657612371360156, "grad_norm": 7.758840166314253, "learning_rate": 7.865619454068137e-06, "loss": 17.9226, "step": 17866 }, { "epoch": 0.3265944029100481, "grad_norm": 7.331408698946266, "learning_rate": 7.865376876846158e-06, "loss": 17.9984, "step": 17867 }, { "epoch": 0.3266126821064946, "grad_norm": 6.837829300824041, "learning_rate": 7.86513428958128e-06, "loss": 17.4893, "step": 17868 }, { "epoch": 0.3266309613029411, "grad_norm": 6.4049637784235545, "learning_rate": 7.864891692274355e-06, "loss": 17.5133, "step": 17869 }, { "epoch": 0.32664924049938765, "grad_norm": 6.273073302128767, "learning_rate": 7.864649084926232e-06, "loss": 17.2967, "step": 17870 }, { "epoch": 0.3266675196958342, "grad_norm": 6.344309302924921, "learning_rate": 7.864406467537764e-06, "loss": 17.439, "step": 17871 }, { "epoch": 0.3266857988922807, "grad_norm": 6.449355080338526, "learning_rate": 7.864163840109802e-06, "loss": 17.1538, "step": 17872 }, { "epoch": 0.3267040780887272, "grad_norm": 5.920602117673282, "learning_rate": 7.863921202643192e-06, "loss": 17.258, "step": 17873 }, { "epoch": 0.32672235728517374, "grad_norm": 6.259629210157948, "learning_rate": 7.863678555138786e-06, "loss": 17.3877, "step": 17874 }, { "epoch": 0.3267406364816203, "grad_norm": 6.69678860967342, "learning_rate": 7.863435897597437e-06, "loss": 17.802, "step": 17875 }, { "epoch": 0.3267589156780668, "grad_norm": 6.340828147922386, "learning_rate": 7.863193230019991e-06, "loss": 17.6317, "step": 17876 }, { "epoch": 0.32677719487451334, "grad_norm": 6.991458089088453, "learning_rate": 7.862950552407304e-06, "loss": 18.0541, "step": 17877 }, { "epoch": 0.3267954740709598, "grad_norm": 6.0945809284360495, "learning_rate": 7.862707864760225e-06, "loss": 17.2963, "step": 17878 }, { "epoch": 0.32681375326740636, "grad_norm": 7.682440158745301, "learning_rate": 7.862465167079599e-06, "loss": 17.6765, "step": 17879 }, { "epoch": 0.3268320324638529, "grad_norm": 6.554405377487383, "learning_rate": 7.862222459366283e-06, "loss": 17.4388, "step": 17880 }, { "epoch": 0.32685031166029943, "grad_norm": 5.860101075469898, "learning_rate": 7.861979741621126e-06, "loss": 17.3062, "step": 17881 }, { "epoch": 0.3268685908567459, "grad_norm": 5.921788599011734, "learning_rate": 7.86173701384498e-06, "loss": 17.421, "step": 17882 }, { "epoch": 0.32688687005319245, "grad_norm": 6.729473933777559, "learning_rate": 7.86149427603869e-06, "loss": 18.0369, "step": 17883 }, { "epoch": 0.326905149249639, "grad_norm": 7.171624055665262, "learning_rate": 7.861251528203113e-06, "loss": 17.7876, "step": 17884 }, { "epoch": 0.3269234284460855, "grad_norm": 5.78811924291365, "learning_rate": 7.861008770339098e-06, "loss": 17.2695, "step": 17885 }, { "epoch": 0.32694170764253205, "grad_norm": 7.386439276230386, "learning_rate": 7.860766002447495e-06, "loss": 17.7566, "step": 17886 }, { "epoch": 0.32695998683897853, "grad_norm": 5.864195043080574, "learning_rate": 7.860523224529156e-06, "loss": 17.34, "step": 17887 }, { "epoch": 0.32697826603542507, "grad_norm": 5.371148963013059, "learning_rate": 7.86028043658493e-06, "loss": 17.0583, "step": 17888 }, { "epoch": 0.3269965452318716, "grad_norm": 7.595197440764273, "learning_rate": 7.860037638615671e-06, "loss": 17.8264, "step": 17889 }, { "epoch": 0.32701482442831814, "grad_norm": 5.525909084266543, "learning_rate": 7.859794830622227e-06, "loss": 17.1311, "step": 17890 }, { "epoch": 0.3270331036247647, "grad_norm": 7.350189973649129, "learning_rate": 7.859552012605452e-06, "loss": 18.1613, "step": 17891 }, { "epoch": 0.32705138282121116, "grad_norm": 6.313772679875273, "learning_rate": 7.859309184566193e-06, "loss": 17.7864, "step": 17892 }, { "epoch": 0.3270696620176577, "grad_norm": 5.807894059462818, "learning_rate": 7.859066346505305e-06, "loss": 17.3594, "step": 17893 }, { "epoch": 0.3270879412141042, "grad_norm": 5.577269715782767, "learning_rate": 7.858823498423637e-06, "loss": 17.5178, "step": 17894 }, { "epoch": 0.32710622041055076, "grad_norm": 5.881722427483263, "learning_rate": 7.85858064032204e-06, "loss": 17.5096, "step": 17895 }, { "epoch": 0.3271244996069973, "grad_norm": 7.551792391781151, "learning_rate": 7.858337772201368e-06, "loss": 18.3451, "step": 17896 }, { "epoch": 0.3271427788034438, "grad_norm": 7.199569481845562, "learning_rate": 7.858094894062468e-06, "loss": 17.7249, "step": 17897 }, { "epoch": 0.3271610579998903, "grad_norm": 6.221756197581803, "learning_rate": 7.857852005906195e-06, "loss": 17.5991, "step": 17898 }, { "epoch": 0.32717933719633685, "grad_norm": 7.165307209689358, "learning_rate": 7.857609107733398e-06, "loss": 17.9371, "step": 17899 }, { "epoch": 0.3271976163927834, "grad_norm": 6.4143827619662614, "learning_rate": 7.857366199544929e-06, "loss": 17.3668, "step": 17900 }, { "epoch": 0.3272158955892299, "grad_norm": 6.634976745565556, "learning_rate": 7.857123281341639e-06, "loss": 17.635, "step": 17901 }, { "epoch": 0.3272341747856764, "grad_norm": 6.445272136131572, "learning_rate": 7.85688035312438e-06, "loss": 17.7784, "step": 17902 }, { "epoch": 0.32725245398212294, "grad_norm": 6.627760389550447, "learning_rate": 7.856637414894003e-06, "loss": 17.7706, "step": 17903 }, { "epoch": 0.3272707331785695, "grad_norm": 7.047530424947791, "learning_rate": 7.85639446665136e-06, "loss": 17.7615, "step": 17904 }, { "epoch": 0.327289012375016, "grad_norm": 6.242491228600791, "learning_rate": 7.856151508397303e-06, "loss": 17.2843, "step": 17905 }, { "epoch": 0.32730729157146254, "grad_norm": 7.56601766773708, "learning_rate": 7.855908540132682e-06, "loss": 18.0979, "step": 17906 }, { "epoch": 0.327325570767909, "grad_norm": 6.057493735827574, "learning_rate": 7.85566556185835e-06, "loss": 17.554, "step": 17907 }, { "epoch": 0.32734384996435556, "grad_norm": 6.36774798328385, "learning_rate": 7.855422573575158e-06, "loss": 17.4506, "step": 17908 }, { "epoch": 0.3273621291608021, "grad_norm": 5.150073712766874, "learning_rate": 7.855179575283958e-06, "loss": 16.9747, "step": 17909 }, { "epoch": 0.32738040835724863, "grad_norm": 6.624202698198208, "learning_rate": 7.8549365669856e-06, "loss": 17.5623, "step": 17910 }, { "epoch": 0.32739868755369517, "grad_norm": 6.897513006872821, "learning_rate": 7.854693548680939e-06, "loss": 17.84, "step": 17911 }, { "epoch": 0.32741696675014165, "grad_norm": 6.807505608479961, "learning_rate": 7.854450520370823e-06, "loss": 17.7713, "step": 17912 }, { "epoch": 0.3274352459465882, "grad_norm": 8.995518507179327, "learning_rate": 7.854207482056106e-06, "loss": 18.9268, "step": 17913 }, { "epoch": 0.3274535251430347, "grad_norm": 6.933558453854653, "learning_rate": 7.85396443373764e-06, "loss": 17.8008, "step": 17914 }, { "epoch": 0.32747180433948125, "grad_norm": 6.034555834357745, "learning_rate": 7.853721375416276e-06, "loss": 17.4894, "step": 17915 }, { "epoch": 0.32749008353592773, "grad_norm": 6.874334781188533, "learning_rate": 7.853478307092867e-06, "loss": 17.7075, "step": 17916 }, { "epoch": 0.32750836273237427, "grad_norm": 6.582386838434732, "learning_rate": 7.853235228768263e-06, "loss": 17.7948, "step": 17917 }, { "epoch": 0.3275266419288208, "grad_norm": 5.708070149466229, "learning_rate": 7.852992140443317e-06, "loss": 17.3767, "step": 17918 }, { "epoch": 0.32754492112526734, "grad_norm": 5.705497787883412, "learning_rate": 7.852749042118882e-06, "loss": 17.1096, "step": 17919 }, { "epoch": 0.3275632003217139, "grad_norm": 6.898629192703969, "learning_rate": 7.85250593379581e-06, "loss": 17.7545, "step": 17920 }, { "epoch": 0.32758147951816036, "grad_norm": 5.666294156284441, "learning_rate": 7.85226281547495e-06, "loss": 17.4826, "step": 17921 }, { "epoch": 0.3275997587146069, "grad_norm": 5.759539364835163, "learning_rate": 7.852019687157158e-06, "loss": 17.1894, "step": 17922 }, { "epoch": 0.32761803791105343, "grad_norm": 6.0697805940568275, "learning_rate": 7.851776548843285e-06, "loss": 17.3675, "step": 17923 }, { "epoch": 0.32763631710749996, "grad_norm": 6.1392823783558494, "learning_rate": 7.851533400534179e-06, "loss": 17.5242, "step": 17924 }, { "epoch": 0.3276545963039465, "grad_norm": 8.411186952250112, "learning_rate": 7.8512902422307e-06, "loss": 18.1547, "step": 17925 }, { "epoch": 0.327672875500393, "grad_norm": 8.022695074463817, "learning_rate": 7.851047073933693e-06, "loss": 17.9314, "step": 17926 }, { "epoch": 0.3276911546968395, "grad_norm": 5.887575318032095, "learning_rate": 7.850803895644017e-06, "loss": 17.4752, "step": 17927 }, { "epoch": 0.32770943389328605, "grad_norm": 7.175985913344132, "learning_rate": 7.850560707362518e-06, "loss": 17.9756, "step": 17928 }, { "epoch": 0.3277277130897326, "grad_norm": 6.701434753654602, "learning_rate": 7.85031750909005e-06, "loss": 17.622, "step": 17929 }, { "epoch": 0.3277459922861791, "grad_norm": 5.9431661773785684, "learning_rate": 7.85007430082747e-06, "loss": 17.4444, "step": 17930 }, { "epoch": 0.3277642714826256, "grad_norm": 5.460614712466154, "learning_rate": 7.849831082575625e-06, "loss": 17.2491, "step": 17931 }, { "epoch": 0.32778255067907214, "grad_norm": 6.449065531617945, "learning_rate": 7.849587854335369e-06, "loss": 17.6549, "step": 17932 }, { "epoch": 0.3278008298755187, "grad_norm": 6.505054635099538, "learning_rate": 7.849344616107554e-06, "loss": 17.4446, "step": 17933 }, { "epoch": 0.3278191090719652, "grad_norm": 6.711412197228129, "learning_rate": 7.849101367893037e-06, "loss": 17.407, "step": 17934 }, { "epoch": 0.32783738826841174, "grad_norm": 7.0024387822301275, "learning_rate": 7.848858109692663e-06, "loss": 17.6073, "step": 17935 }, { "epoch": 0.3278556674648582, "grad_norm": 5.9135186739116845, "learning_rate": 7.84861484150729e-06, "loss": 17.4377, "step": 17936 }, { "epoch": 0.32787394666130476, "grad_norm": 7.451437561929218, "learning_rate": 7.848371563337771e-06, "loss": 17.9172, "step": 17937 }, { "epoch": 0.3278922258577513, "grad_norm": 7.453435976355388, "learning_rate": 7.848128275184954e-06, "loss": 17.9466, "step": 17938 }, { "epoch": 0.32791050505419783, "grad_norm": 6.787862427824031, "learning_rate": 7.847884977049695e-06, "loss": 17.8055, "step": 17939 }, { "epoch": 0.32792878425064437, "grad_norm": 5.684888889559415, "learning_rate": 7.847641668932848e-06, "loss": 17.0676, "step": 17940 }, { "epoch": 0.32794706344709085, "grad_norm": 6.670039474136435, "learning_rate": 7.847398350835263e-06, "loss": 17.7334, "step": 17941 }, { "epoch": 0.3279653426435374, "grad_norm": 6.450659337918838, "learning_rate": 7.847155022757793e-06, "loss": 17.2774, "step": 17942 }, { "epoch": 0.3279836218399839, "grad_norm": 7.099834262384581, "learning_rate": 7.846911684701293e-06, "loss": 18.0099, "step": 17943 }, { "epoch": 0.32800190103643045, "grad_norm": 8.16204715184103, "learning_rate": 7.846668336666616e-06, "loss": 17.9374, "step": 17944 }, { "epoch": 0.328020180232877, "grad_norm": 5.679050610946927, "learning_rate": 7.84642497865461e-06, "loss": 17.0642, "step": 17945 }, { "epoch": 0.32803845942932347, "grad_norm": 6.36842780726683, "learning_rate": 7.846181610666134e-06, "loss": 17.358, "step": 17946 }, { "epoch": 0.32805673862577, "grad_norm": 7.540779931147121, "learning_rate": 7.845938232702037e-06, "loss": 18.0322, "step": 17947 }, { "epoch": 0.32807501782221654, "grad_norm": 6.828821345241826, "learning_rate": 7.845694844763174e-06, "loss": 17.5905, "step": 17948 }, { "epoch": 0.3280932970186631, "grad_norm": 6.156397289374125, "learning_rate": 7.845451446850399e-06, "loss": 17.3741, "step": 17949 }, { "epoch": 0.32811157621510956, "grad_norm": 8.289089484106746, "learning_rate": 7.845208038964562e-06, "loss": 17.8755, "step": 17950 }, { "epoch": 0.3281298554115561, "grad_norm": 6.832392891004793, "learning_rate": 7.844964621106518e-06, "loss": 17.6991, "step": 17951 }, { "epoch": 0.32814813460800263, "grad_norm": 5.67463719989777, "learning_rate": 7.844721193277118e-06, "loss": 17.4293, "step": 17952 }, { "epoch": 0.32816641380444916, "grad_norm": 6.881617909594826, "learning_rate": 7.84447775547722e-06, "loss": 17.6258, "step": 17953 }, { "epoch": 0.3281846930008957, "grad_norm": 6.887026728402564, "learning_rate": 7.844234307707673e-06, "loss": 17.658, "step": 17954 }, { "epoch": 0.3282029721973422, "grad_norm": 6.028614365375526, "learning_rate": 7.843990849969332e-06, "loss": 17.4576, "step": 17955 }, { "epoch": 0.3282212513937887, "grad_norm": 6.630595707426664, "learning_rate": 7.84374738226305e-06, "loss": 17.7347, "step": 17956 }, { "epoch": 0.32823953059023525, "grad_norm": 6.54165765514594, "learning_rate": 7.843503904589678e-06, "loss": 17.6441, "step": 17957 }, { "epoch": 0.3282578097866818, "grad_norm": 7.652646522711401, "learning_rate": 7.843260416950073e-06, "loss": 18.535, "step": 17958 }, { "epoch": 0.3282760889831283, "grad_norm": 6.696123866533352, "learning_rate": 7.843016919345088e-06, "loss": 17.6987, "step": 17959 }, { "epoch": 0.3282943681795748, "grad_norm": 6.198056922095434, "learning_rate": 7.842773411775575e-06, "loss": 17.2978, "step": 17960 }, { "epoch": 0.32831264737602134, "grad_norm": 9.442932381923036, "learning_rate": 7.842529894242387e-06, "loss": 18.1727, "step": 17961 }, { "epoch": 0.3283309265724679, "grad_norm": 6.890377807218648, "learning_rate": 7.84228636674638e-06, "loss": 17.2305, "step": 17962 }, { "epoch": 0.3283492057689144, "grad_norm": 7.8386009702386445, "learning_rate": 7.842042829288405e-06, "loss": 18.2672, "step": 17963 }, { "epoch": 0.32836748496536095, "grad_norm": 5.3862193100565, "learning_rate": 7.841799281869316e-06, "loss": 16.9004, "step": 17964 }, { "epoch": 0.3283857641618074, "grad_norm": 6.426333063664287, "learning_rate": 7.841555724489968e-06, "loss": 17.3328, "step": 17965 }, { "epoch": 0.32840404335825396, "grad_norm": 6.454542323521426, "learning_rate": 7.841312157151214e-06, "loss": 17.7325, "step": 17966 }, { "epoch": 0.3284223225547005, "grad_norm": 6.364717940158, "learning_rate": 7.841068579853905e-06, "loss": 17.4542, "step": 17967 }, { "epoch": 0.32844060175114703, "grad_norm": 8.143825905159051, "learning_rate": 7.8408249925989e-06, "loss": 18.5515, "step": 17968 }, { "epoch": 0.32845888094759357, "grad_norm": 6.231896144861432, "learning_rate": 7.840581395387049e-06, "loss": 17.5316, "step": 17969 }, { "epoch": 0.32847716014404005, "grad_norm": 6.312618339014489, "learning_rate": 7.840337788219206e-06, "loss": 17.3823, "step": 17970 }, { "epoch": 0.3284954393404866, "grad_norm": 7.612973403971207, "learning_rate": 7.840094171096227e-06, "loss": 17.9676, "step": 17971 }, { "epoch": 0.3285137185369331, "grad_norm": 7.0474817026375955, "learning_rate": 7.839850544018963e-06, "loss": 17.5598, "step": 17972 }, { "epoch": 0.32853199773337965, "grad_norm": 7.233474172101555, "learning_rate": 7.839606906988269e-06, "loss": 17.6773, "step": 17973 }, { "epoch": 0.3285502769298262, "grad_norm": 7.354578322300646, "learning_rate": 7.839363260005e-06, "loss": 18.029, "step": 17974 }, { "epoch": 0.32856855612627267, "grad_norm": 6.074226772905845, "learning_rate": 7.839119603070009e-06, "loss": 17.4109, "step": 17975 }, { "epoch": 0.3285868353227192, "grad_norm": 6.238306304417973, "learning_rate": 7.83887593618415e-06, "loss": 17.4351, "step": 17976 }, { "epoch": 0.32860511451916574, "grad_norm": 7.7263793962581655, "learning_rate": 7.838632259348277e-06, "loss": 18.1237, "step": 17977 }, { "epoch": 0.3286233937156123, "grad_norm": 7.757291919193158, "learning_rate": 7.838388572563245e-06, "loss": 18.1212, "step": 17978 }, { "epoch": 0.3286416729120588, "grad_norm": 5.371021798274012, "learning_rate": 7.838144875829907e-06, "loss": 17.1414, "step": 17979 }, { "epoch": 0.3286599521085053, "grad_norm": 5.787143922859928, "learning_rate": 7.837901169149118e-06, "loss": 17.484, "step": 17980 }, { "epoch": 0.32867823130495183, "grad_norm": 5.58306720618807, "learning_rate": 7.837657452521731e-06, "loss": 17.1153, "step": 17981 }, { "epoch": 0.32869651050139836, "grad_norm": 6.215042796250209, "learning_rate": 7.837413725948601e-06, "loss": 17.2298, "step": 17982 }, { "epoch": 0.3287147896978449, "grad_norm": 6.972794338186463, "learning_rate": 7.837169989430582e-06, "loss": 17.7384, "step": 17983 }, { "epoch": 0.3287330688942914, "grad_norm": 5.574196475197414, "learning_rate": 7.836926242968528e-06, "loss": 17.1791, "step": 17984 }, { "epoch": 0.3287513480907379, "grad_norm": 6.929002251117667, "learning_rate": 7.836682486563296e-06, "loss": 17.7906, "step": 17985 }, { "epoch": 0.32876962728718445, "grad_norm": 6.137476365263559, "learning_rate": 7.836438720215736e-06, "loss": 17.0248, "step": 17986 }, { "epoch": 0.328787906483631, "grad_norm": 6.542484529930226, "learning_rate": 7.836194943926704e-06, "loss": 17.8205, "step": 17987 }, { "epoch": 0.3288061856800775, "grad_norm": 6.055576458847637, "learning_rate": 7.835951157697055e-06, "loss": 17.1962, "step": 17988 }, { "epoch": 0.328824464876524, "grad_norm": 5.382599772472619, "learning_rate": 7.835707361527644e-06, "loss": 17.0756, "step": 17989 }, { "epoch": 0.32884274407297054, "grad_norm": 8.919955577730727, "learning_rate": 7.835463555419324e-06, "loss": 18.5505, "step": 17990 }, { "epoch": 0.3288610232694171, "grad_norm": 6.047143064185945, "learning_rate": 7.835219739372952e-06, "loss": 17.4779, "step": 17991 }, { "epoch": 0.3288793024658636, "grad_norm": 7.568418447047422, "learning_rate": 7.83497591338938e-06, "loss": 17.8594, "step": 17992 }, { "epoch": 0.32889758166231015, "grad_norm": 6.442770499078313, "learning_rate": 7.834732077469463e-06, "loss": 17.6591, "step": 17993 }, { "epoch": 0.3289158608587566, "grad_norm": 5.93445607341471, "learning_rate": 7.834488231614056e-06, "loss": 17.4359, "step": 17994 }, { "epoch": 0.32893414005520316, "grad_norm": 5.694514030385999, "learning_rate": 7.834244375824013e-06, "loss": 17.1629, "step": 17995 }, { "epoch": 0.3289524192516497, "grad_norm": 7.673753865343592, "learning_rate": 7.834000510100192e-06, "loss": 17.9861, "step": 17996 }, { "epoch": 0.32897069844809623, "grad_norm": 5.615634763335271, "learning_rate": 7.833756634443442e-06, "loss": 17.3676, "step": 17997 }, { "epoch": 0.32898897764454277, "grad_norm": 6.668802536505096, "learning_rate": 7.833512748854622e-06, "loss": 17.5947, "step": 17998 }, { "epoch": 0.32900725684098925, "grad_norm": 6.918821112867181, "learning_rate": 7.833268853334587e-06, "loss": 17.8065, "step": 17999 }, { "epoch": 0.3290255360374358, "grad_norm": 6.840251713117992, "learning_rate": 7.83302494788419e-06, "loss": 17.9558, "step": 18000 }, { "epoch": 0.3290438152338823, "grad_norm": 6.965153888851557, "learning_rate": 7.832781032504285e-06, "loss": 17.3834, "step": 18001 }, { "epoch": 0.32906209443032886, "grad_norm": 6.2277298638382845, "learning_rate": 7.832537107195729e-06, "loss": 17.7506, "step": 18002 }, { "epoch": 0.3290803736267754, "grad_norm": 7.37426000360595, "learning_rate": 7.832293171959378e-06, "loss": 17.9153, "step": 18003 }, { "epoch": 0.32909865282322187, "grad_norm": 6.85056256944876, "learning_rate": 7.832049226796083e-06, "loss": 17.7379, "step": 18004 }, { "epoch": 0.3291169320196684, "grad_norm": 6.888727972434795, "learning_rate": 7.831805271706701e-06, "loss": 17.7978, "step": 18005 }, { "epoch": 0.32913521121611494, "grad_norm": 5.792005203325986, "learning_rate": 7.83156130669209e-06, "loss": 17.4617, "step": 18006 }, { "epoch": 0.3291534904125615, "grad_norm": 6.5035961323595215, "learning_rate": 7.831317331753099e-06, "loss": 17.6357, "step": 18007 }, { "epoch": 0.329171769609008, "grad_norm": 7.231552584455718, "learning_rate": 7.831073346890588e-06, "loss": 18.0991, "step": 18008 }, { "epoch": 0.3291900488054545, "grad_norm": 5.947293522693324, "learning_rate": 7.830829352105412e-06, "loss": 17.2566, "step": 18009 }, { "epoch": 0.32920832800190103, "grad_norm": 5.742591906200253, "learning_rate": 7.830585347398423e-06, "loss": 17.2973, "step": 18010 }, { "epoch": 0.32922660719834757, "grad_norm": 7.13675380760017, "learning_rate": 7.830341332770477e-06, "loss": 17.7959, "step": 18011 }, { "epoch": 0.3292448863947941, "grad_norm": 7.440563667754312, "learning_rate": 7.830097308222431e-06, "loss": 18.1438, "step": 18012 }, { "epoch": 0.32926316559124064, "grad_norm": 6.20282260730255, "learning_rate": 7.829853273755142e-06, "loss": 17.369, "step": 18013 }, { "epoch": 0.3292814447876871, "grad_norm": 5.730776245466025, "learning_rate": 7.82960922936946e-06, "loss": 17.1559, "step": 18014 }, { "epoch": 0.32929972398413365, "grad_norm": 6.100881290165984, "learning_rate": 7.829365175066244e-06, "loss": 17.5479, "step": 18015 }, { "epoch": 0.3293180031805802, "grad_norm": 6.817386754238109, "learning_rate": 7.82912111084635e-06, "loss": 17.8857, "step": 18016 }, { "epoch": 0.3293362823770267, "grad_norm": 7.652466360647009, "learning_rate": 7.828877036710629e-06, "loss": 17.7605, "step": 18017 }, { "epoch": 0.3293545615734732, "grad_norm": 6.025793143133479, "learning_rate": 7.828632952659942e-06, "loss": 17.3684, "step": 18018 }, { "epoch": 0.32937284076991974, "grad_norm": 5.6804646863167445, "learning_rate": 7.82838885869514e-06, "loss": 17.1107, "step": 18019 }, { "epoch": 0.3293911199663663, "grad_norm": 6.310527485746265, "learning_rate": 7.828144754817083e-06, "loss": 17.2968, "step": 18020 }, { "epoch": 0.3294093991628128, "grad_norm": 6.532576936114714, "learning_rate": 7.827900641026622e-06, "loss": 17.3347, "step": 18021 }, { "epoch": 0.32942767835925935, "grad_norm": 5.82792366400588, "learning_rate": 7.827656517324616e-06, "loss": 17.3119, "step": 18022 }, { "epoch": 0.3294459575557058, "grad_norm": 6.7218850160504235, "learning_rate": 7.827412383711919e-06, "loss": 17.6072, "step": 18023 }, { "epoch": 0.32946423675215236, "grad_norm": 5.941080781099494, "learning_rate": 7.827168240189385e-06, "loss": 17.3296, "step": 18024 }, { "epoch": 0.3294825159485989, "grad_norm": 5.940669637323221, "learning_rate": 7.826924086757873e-06, "loss": 17.3381, "step": 18025 }, { "epoch": 0.32950079514504543, "grad_norm": 6.334301727038886, "learning_rate": 7.826679923418238e-06, "loss": 17.4331, "step": 18026 }, { "epoch": 0.32951907434149197, "grad_norm": 6.434928643502983, "learning_rate": 7.826435750171336e-06, "loss": 17.6518, "step": 18027 }, { "epoch": 0.32953735353793845, "grad_norm": 5.795639760284889, "learning_rate": 7.82619156701802e-06, "loss": 17.3047, "step": 18028 }, { "epoch": 0.329555632734385, "grad_norm": 6.196272877079253, "learning_rate": 7.825947373959147e-06, "loss": 17.5228, "step": 18029 }, { "epoch": 0.3295739119308315, "grad_norm": 5.278615878184858, "learning_rate": 7.825703170995576e-06, "loss": 17.0388, "step": 18030 }, { "epoch": 0.32959219112727806, "grad_norm": 7.92969487264405, "learning_rate": 7.82545895812816e-06, "loss": 17.9639, "step": 18031 }, { "epoch": 0.3296104703237246, "grad_norm": 7.482819457997297, "learning_rate": 7.825214735357754e-06, "loss": 17.8671, "step": 18032 }, { "epoch": 0.32962874952017107, "grad_norm": 5.786874699951977, "learning_rate": 7.824970502685218e-06, "loss": 17.2433, "step": 18033 }, { "epoch": 0.3296470287166176, "grad_norm": 6.134673803576229, "learning_rate": 7.824726260111403e-06, "loss": 17.4821, "step": 18034 }, { "epoch": 0.32966530791306414, "grad_norm": 6.769792504409528, "learning_rate": 7.824482007637171e-06, "loss": 17.7747, "step": 18035 }, { "epoch": 0.3296835871095107, "grad_norm": 6.461015525984091, "learning_rate": 7.824237745263372e-06, "loss": 17.3478, "step": 18036 }, { "epoch": 0.3297018663059572, "grad_norm": 8.139266660952575, "learning_rate": 7.823993472990866e-06, "loss": 18.0593, "step": 18037 }, { "epoch": 0.3297201455024037, "grad_norm": 5.728976451364784, "learning_rate": 7.823749190820507e-06, "loss": 17.1549, "step": 18038 }, { "epoch": 0.32973842469885023, "grad_norm": 6.000148853719646, "learning_rate": 7.823504898753152e-06, "loss": 17.5285, "step": 18039 }, { "epoch": 0.32975670389529677, "grad_norm": 5.398455405227888, "learning_rate": 7.823260596789657e-06, "loss": 17.1715, "step": 18040 }, { "epoch": 0.3297749830917433, "grad_norm": 7.0204903803390675, "learning_rate": 7.82301628493088e-06, "loss": 17.6809, "step": 18041 }, { "epoch": 0.32979326228818984, "grad_norm": 6.668100476576282, "learning_rate": 7.822771963177676e-06, "loss": 17.872, "step": 18042 }, { "epoch": 0.3298115414846363, "grad_norm": 7.371328529126587, "learning_rate": 7.822527631530898e-06, "loss": 18.0666, "step": 18043 }, { "epoch": 0.32982982068108285, "grad_norm": 8.178018564594312, "learning_rate": 7.822283289991411e-06, "loss": 18.3436, "step": 18044 }, { "epoch": 0.3298480998775294, "grad_norm": 11.580222952895815, "learning_rate": 7.822038938560061e-06, "loss": 18.7833, "step": 18045 }, { "epoch": 0.3298663790739759, "grad_norm": 7.480236483365055, "learning_rate": 7.82179457723771e-06, "loss": 17.4975, "step": 18046 }, { "epoch": 0.32988465827042246, "grad_norm": 6.546495003471312, "learning_rate": 7.821550206025218e-06, "loss": 17.6675, "step": 18047 }, { "epoch": 0.32990293746686894, "grad_norm": 7.540433245344673, "learning_rate": 7.821305824923434e-06, "loss": 17.9526, "step": 18048 }, { "epoch": 0.3299212166633155, "grad_norm": 6.398733916800262, "learning_rate": 7.821061433933217e-06, "loss": 17.98, "step": 18049 }, { "epoch": 0.329939495859762, "grad_norm": 6.002220326238243, "learning_rate": 7.820817033055426e-06, "loss": 17.5718, "step": 18050 }, { "epoch": 0.32995777505620855, "grad_norm": 5.524521422577539, "learning_rate": 7.820572622290916e-06, "loss": 17.1192, "step": 18051 }, { "epoch": 0.329976054252655, "grad_norm": 7.543780459250276, "learning_rate": 7.820328201640545e-06, "loss": 17.9918, "step": 18052 }, { "epoch": 0.32999433344910156, "grad_norm": 7.530696739431696, "learning_rate": 7.820083771105166e-06, "loss": 18.4403, "step": 18053 }, { "epoch": 0.3300126126455481, "grad_norm": 6.880084319685398, "learning_rate": 7.819839330685641e-06, "loss": 17.7529, "step": 18054 }, { "epoch": 0.33003089184199463, "grad_norm": 5.9957187798217, "learning_rate": 7.819594880382822e-06, "loss": 17.5537, "step": 18055 }, { "epoch": 0.33004917103844117, "grad_norm": 8.015415195987858, "learning_rate": 7.819350420197566e-06, "loss": 18.0474, "step": 18056 }, { "epoch": 0.33006745023488765, "grad_norm": 6.748262435062119, "learning_rate": 7.819105950130734e-06, "loss": 17.699, "step": 18057 }, { "epoch": 0.3300857294313342, "grad_norm": 6.842770845866191, "learning_rate": 7.81886147018318e-06, "loss": 17.9296, "step": 18058 }, { "epoch": 0.3301040086277807, "grad_norm": 7.048283201148179, "learning_rate": 7.818616980355759e-06, "loss": 18.0855, "step": 18059 }, { "epoch": 0.33012228782422726, "grad_norm": 5.679185944674213, "learning_rate": 7.818372480649332e-06, "loss": 17.4917, "step": 18060 }, { "epoch": 0.3301405670206738, "grad_norm": 8.87379436962972, "learning_rate": 7.818127971064755e-06, "loss": 18.5461, "step": 18061 }, { "epoch": 0.33015884621712027, "grad_norm": 7.260242380332696, "learning_rate": 7.817883451602884e-06, "loss": 17.8184, "step": 18062 }, { "epoch": 0.3301771254135668, "grad_norm": 6.120923486287874, "learning_rate": 7.817638922264572e-06, "loss": 17.292, "step": 18063 }, { "epoch": 0.33019540461001334, "grad_norm": 6.204902826118205, "learning_rate": 7.817394383050683e-06, "loss": 17.5321, "step": 18064 }, { "epoch": 0.3302136838064599, "grad_norm": 6.371457833218362, "learning_rate": 7.817149833962073e-06, "loss": 17.5532, "step": 18065 }, { "epoch": 0.3302319630029064, "grad_norm": 6.202345362612025, "learning_rate": 7.816905274999594e-06, "loss": 17.5803, "step": 18066 }, { "epoch": 0.3302502421993529, "grad_norm": 5.772474384306617, "learning_rate": 7.816660706164107e-06, "loss": 17.3038, "step": 18067 }, { "epoch": 0.33026852139579943, "grad_norm": 6.2731523091285215, "learning_rate": 7.81641612745647e-06, "loss": 17.4522, "step": 18068 }, { "epoch": 0.33028680059224597, "grad_norm": 6.030460793230085, "learning_rate": 7.81617153887754e-06, "loss": 17.4709, "step": 18069 }, { "epoch": 0.3303050797886925, "grad_norm": 7.152867573673834, "learning_rate": 7.815926940428169e-06, "loss": 18.0158, "step": 18070 }, { "epoch": 0.33032335898513904, "grad_norm": 6.988290298304303, "learning_rate": 7.815682332109221e-06, "loss": 18.0304, "step": 18071 }, { "epoch": 0.3303416381815855, "grad_norm": 5.60823683949017, "learning_rate": 7.815437713921553e-06, "loss": 17.0719, "step": 18072 }, { "epoch": 0.33035991737803205, "grad_norm": 6.508456643510161, "learning_rate": 7.815193085866017e-06, "loss": 17.7962, "step": 18073 }, { "epoch": 0.3303781965744786, "grad_norm": 5.333315064822724, "learning_rate": 7.814948447943474e-06, "loss": 17.0398, "step": 18074 }, { "epoch": 0.3303964757709251, "grad_norm": 6.06665473651248, "learning_rate": 7.814703800154781e-06, "loss": 17.2698, "step": 18075 }, { "epoch": 0.33041475496737166, "grad_norm": 6.011162302757721, "learning_rate": 7.814459142500795e-06, "loss": 17.2969, "step": 18076 }, { "epoch": 0.33043303416381814, "grad_norm": 6.284279519153075, "learning_rate": 7.814214474982374e-06, "loss": 17.3795, "step": 18077 }, { "epoch": 0.3304513133602647, "grad_norm": 6.412640893166762, "learning_rate": 7.813969797600377e-06, "loss": 17.8153, "step": 18078 }, { "epoch": 0.3304695925567112, "grad_norm": 6.086706312108125, "learning_rate": 7.813725110355658e-06, "loss": 17.514, "step": 18079 }, { "epoch": 0.33048787175315775, "grad_norm": 6.369750439259867, "learning_rate": 7.813480413249078e-06, "loss": 17.4404, "step": 18080 }, { "epoch": 0.3305061509496043, "grad_norm": 6.714898930401324, "learning_rate": 7.813235706281493e-06, "loss": 17.7209, "step": 18081 }, { "epoch": 0.33052443014605076, "grad_norm": 5.085908452935506, "learning_rate": 7.812990989453762e-06, "loss": 17.2132, "step": 18082 }, { "epoch": 0.3305427093424973, "grad_norm": 7.636258516957033, "learning_rate": 7.81274626276674e-06, "loss": 18.3384, "step": 18083 }, { "epoch": 0.33056098853894383, "grad_norm": 6.3478744830122045, "learning_rate": 7.812501526221286e-06, "loss": 17.6347, "step": 18084 }, { "epoch": 0.33057926773539037, "grad_norm": 6.298719686003601, "learning_rate": 7.812256779818262e-06, "loss": 17.2591, "step": 18085 }, { "epoch": 0.33059754693183685, "grad_norm": 6.323332639605773, "learning_rate": 7.812012023558517e-06, "loss": 17.4948, "step": 18086 }, { "epoch": 0.3306158261282834, "grad_norm": 6.769834675958703, "learning_rate": 7.811767257442917e-06, "loss": 17.6773, "step": 18087 }, { "epoch": 0.3306341053247299, "grad_norm": 7.566896663553592, "learning_rate": 7.811522481472316e-06, "loss": 18.1479, "step": 18088 }, { "epoch": 0.33065238452117646, "grad_norm": 7.349644500255119, "learning_rate": 7.811277695647573e-06, "loss": 17.9977, "step": 18089 }, { "epoch": 0.330670663717623, "grad_norm": 6.247972811162064, "learning_rate": 7.811032899969545e-06, "loss": 17.5106, "step": 18090 }, { "epoch": 0.3306889429140695, "grad_norm": 8.004581912619546, "learning_rate": 7.81078809443909e-06, "loss": 18.3166, "step": 18091 }, { "epoch": 0.330707222110516, "grad_norm": 7.109596586974475, "learning_rate": 7.810543279057068e-06, "loss": 17.8664, "step": 18092 }, { "epoch": 0.33072550130696254, "grad_norm": 5.738651723051288, "learning_rate": 7.810298453824336e-06, "loss": 17.3457, "step": 18093 }, { "epoch": 0.3307437805034091, "grad_norm": 7.220865070391188, "learning_rate": 7.81005361874175e-06, "loss": 17.6067, "step": 18094 }, { "epoch": 0.3307620596998556, "grad_norm": 8.148402427707252, "learning_rate": 7.80980877381017e-06, "loss": 18.197, "step": 18095 }, { "epoch": 0.3307803388963021, "grad_norm": 5.588565291030363, "learning_rate": 7.809563919030456e-06, "loss": 17.1872, "step": 18096 }, { "epoch": 0.33079861809274863, "grad_norm": 6.75662550492222, "learning_rate": 7.809319054403463e-06, "loss": 17.1131, "step": 18097 }, { "epoch": 0.33081689728919517, "grad_norm": 7.431033782768452, "learning_rate": 7.80907417993005e-06, "loss": 17.975, "step": 18098 }, { "epoch": 0.3308351764856417, "grad_norm": 7.685822219913446, "learning_rate": 7.808829295611078e-06, "loss": 17.7454, "step": 18099 }, { "epoch": 0.33085345568208824, "grad_norm": 6.228433468252308, "learning_rate": 7.8085844014474e-06, "loss": 17.2792, "step": 18100 }, { "epoch": 0.3308717348785347, "grad_norm": 7.236931279106639, "learning_rate": 7.808339497439881e-06, "loss": 17.8093, "step": 18101 }, { "epoch": 0.33089001407498125, "grad_norm": 4.747904350524478, "learning_rate": 7.808094583589372e-06, "loss": 16.8722, "step": 18102 }, { "epoch": 0.3309082932714278, "grad_norm": 5.943061297250326, "learning_rate": 7.807849659896738e-06, "loss": 17.5772, "step": 18103 }, { "epoch": 0.3309265724678743, "grad_norm": 8.433176466603951, "learning_rate": 7.807604726362833e-06, "loss": 18.0154, "step": 18104 }, { "epoch": 0.33094485166432086, "grad_norm": 7.371157211341919, "learning_rate": 7.807359782988517e-06, "loss": 17.5609, "step": 18105 }, { "epoch": 0.33096313086076734, "grad_norm": 7.081159680112228, "learning_rate": 7.80711482977465e-06, "loss": 17.9772, "step": 18106 }, { "epoch": 0.3309814100572139, "grad_norm": 8.509748744179188, "learning_rate": 7.806869866722087e-06, "loss": 18.6014, "step": 18107 }, { "epoch": 0.3309996892536604, "grad_norm": 6.176470547847755, "learning_rate": 7.806624893831692e-06, "loss": 17.4069, "step": 18108 }, { "epoch": 0.33101796845010695, "grad_norm": 5.95420010238592, "learning_rate": 7.806379911104316e-06, "loss": 17.4311, "step": 18109 }, { "epoch": 0.3310362476465535, "grad_norm": 5.677335869781743, "learning_rate": 7.806134918540825e-06, "loss": 17.4766, "step": 18110 }, { "epoch": 0.33105452684299996, "grad_norm": 5.634643249307986, "learning_rate": 7.805889916142073e-06, "loss": 17.2223, "step": 18111 }, { "epoch": 0.3310728060394465, "grad_norm": 6.59916300705919, "learning_rate": 7.805644903908922e-06, "loss": 17.4556, "step": 18112 }, { "epoch": 0.33109108523589303, "grad_norm": 5.978686613423198, "learning_rate": 7.805399881842227e-06, "loss": 17.1401, "step": 18113 }, { "epoch": 0.33110936443233957, "grad_norm": 6.213357174843528, "learning_rate": 7.805154849942851e-06, "loss": 17.2441, "step": 18114 }, { "epoch": 0.3311276436287861, "grad_norm": 6.805620757747374, "learning_rate": 7.804909808211649e-06, "loss": 17.5737, "step": 18115 }, { "epoch": 0.3311459228252326, "grad_norm": 6.61973807849275, "learning_rate": 7.804664756649483e-06, "loss": 17.7328, "step": 18116 }, { "epoch": 0.3311642020216791, "grad_norm": 7.081637541097287, "learning_rate": 7.80441969525721e-06, "loss": 18.1021, "step": 18117 }, { "epoch": 0.33118248121812566, "grad_norm": 6.1570293838340024, "learning_rate": 7.804174624035687e-06, "loss": 17.2987, "step": 18118 }, { "epoch": 0.3312007604145722, "grad_norm": 7.137962111086096, "learning_rate": 7.803929542985778e-06, "loss": 18.09, "step": 18119 }, { "epoch": 0.3312190396110187, "grad_norm": 6.630613522949249, "learning_rate": 7.80368445210834e-06, "loss": 17.2999, "step": 18120 }, { "epoch": 0.3312373188074652, "grad_norm": 6.5951159747846795, "learning_rate": 7.80343935140423e-06, "loss": 17.5798, "step": 18121 }, { "epoch": 0.33125559800391174, "grad_norm": 5.678767297055565, "learning_rate": 7.803194240874307e-06, "loss": 17.2323, "step": 18122 }, { "epoch": 0.3312738772003583, "grad_norm": 6.177090873093481, "learning_rate": 7.802949120519433e-06, "loss": 17.4886, "step": 18123 }, { "epoch": 0.3312921563968048, "grad_norm": 6.948735445214337, "learning_rate": 7.802703990340465e-06, "loss": 17.9592, "step": 18124 }, { "epoch": 0.3313104355932513, "grad_norm": 6.245303136428903, "learning_rate": 7.802458850338262e-06, "loss": 17.5357, "step": 18125 }, { "epoch": 0.33132871478969783, "grad_norm": 5.0947277965446975, "learning_rate": 7.802213700513686e-06, "loss": 16.7877, "step": 18126 }, { "epoch": 0.33134699398614437, "grad_norm": 6.178992615043774, "learning_rate": 7.801968540867593e-06, "loss": 17.5462, "step": 18127 }, { "epoch": 0.3313652731825909, "grad_norm": 6.400873647242991, "learning_rate": 7.801723371400842e-06, "loss": 17.5993, "step": 18128 }, { "epoch": 0.33138355237903744, "grad_norm": 7.180080291465306, "learning_rate": 7.801478192114294e-06, "loss": 17.7825, "step": 18129 }, { "epoch": 0.3314018315754839, "grad_norm": 8.426975854213334, "learning_rate": 7.80123300300881e-06, "loss": 18.0174, "step": 18130 }, { "epoch": 0.33142011077193045, "grad_norm": 9.678813909641246, "learning_rate": 7.800987804085248e-06, "loss": 18.6294, "step": 18131 }, { "epoch": 0.331438389968377, "grad_norm": 6.117208571758629, "learning_rate": 7.800742595344464e-06, "loss": 17.3539, "step": 18132 }, { "epoch": 0.3314566691648235, "grad_norm": 8.796654775465436, "learning_rate": 7.800497376787322e-06, "loss": 18.2088, "step": 18133 }, { "epoch": 0.33147494836127006, "grad_norm": 6.591865191776443, "learning_rate": 7.80025214841468e-06, "loss": 17.7186, "step": 18134 }, { "epoch": 0.33149322755771654, "grad_norm": 12.247619296007418, "learning_rate": 7.800006910227395e-06, "loss": 18.1093, "step": 18135 }, { "epoch": 0.3315115067541631, "grad_norm": 6.145603212567427, "learning_rate": 7.79976166222633e-06, "loss": 17.3947, "step": 18136 }, { "epoch": 0.3315297859506096, "grad_norm": 5.775018551611026, "learning_rate": 7.799516404412344e-06, "loss": 17.327, "step": 18137 }, { "epoch": 0.33154806514705615, "grad_norm": 7.1323289170806365, "learning_rate": 7.799271136786294e-06, "loss": 17.8676, "step": 18138 }, { "epoch": 0.3315663443435027, "grad_norm": 6.461974829962352, "learning_rate": 7.799025859349043e-06, "loss": 17.6025, "step": 18139 }, { "epoch": 0.33158462353994916, "grad_norm": 7.838103949478371, "learning_rate": 7.79878057210145e-06, "loss": 18.3584, "step": 18140 }, { "epoch": 0.3316029027363957, "grad_norm": 8.413702494573815, "learning_rate": 7.798535275044374e-06, "loss": 18.6791, "step": 18141 }, { "epoch": 0.33162118193284223, "grad_norm": 5.8768644266598145, "learning_rate": 7.798289968178674e-06, "loss": 17.3626, "step": 18142 }, { "epoch": 0.33163946112928877, "grad_norm": 5.994574320590915, "learning_rate": 7.79804465150521e-06, "loss": 17.5051, "step": 18143 }, { "epoch": 0.3316577403257353, "grad_norm": 6.586087063590682, "learning_rate": 7.797799325024842e-06, "loss": 17.736, "step": 18144 }, { "epoch": 0.3316760195221818, "grad_norm": 5.638852904875247, "learning_rate": 7.797553988738432e-06, "loss": 17.2101, "step": 18145 }, { "epoch": 0.3316942987186283, "grad_norm": 5.736074742061226, "learning_rate": 7.797308642646836e-06, "loss": 17.1432, "step": 18146 }, { "epoch": 0.33171257791507486, "grad_norm": 7.090918000663191, "learning_rate": 7.797063286750916e-06, "loss": 17.6971, "step": 18147 }, { "epoch": 0.3317308571115214, "grad_norm": 6.206276526643924, "learning_rate": 7.796817921051534e-06, "loss": 17.3684, "step": 18148 }, { "epoch": 0.33174913630796793, "grad_norm": 6.436787675555288, "learning_rate": 7.796572545549546e-06, "loss": 17.7381, "step": 18149 }, { "epoch": 0.3317674155044144, "grad_norm": 5.870395325684857, "learning_rate": 7.796327160245814e-06, "loss": 17.257, "step": 18150 }, { "epoch": 0.33178569470086094, "grad_norm": 7.163311270417848, "learning_rate": 7.796081765141198e-06, "loss": 17.813, "step": 18151 }, { "epoch": 0.3318039738973075, "grad_norm": 6.897218684159557, "learning_rate": 7.795836360236559e-06, "loss": 17.6345, "step": 18152 }, { "epoch": 0.331822253093754, "grad_norm": 6.524512327566224, "learning_rate": 7.795590945532757e-06, "loss": 17.5572, "step": 18153 }, { "epoch": 0.3318405322902005, "grad_norm": 6.146439310580656, "learning_rate": 7.79534552103065e-06, "loss": 17.2985, "step": 18154 }, { "epoch": 0.33185881148664703, "grad_norm": 6.068391168257, "learning_rate": 7.7951000867311e-06, "loss": 17.6694, "step": 18155 }, { "epoch": 0.33187709068309357, "grad_norm": 7.338505430554289, "learning_rate": 7.794854642634964e-06, "loss": 17.6184, "step": 18156 }, { "epoch": 0.3318953698795401, "grad_norm": 6.143827448923468, "learning_rate": 7.794609188743108e-06, "loss": 17.3543, "step": 18157 }, { "epoch": 0.33191364907598664, "grad_norm": 7.009805038282566, "learning_rate": 7.79436372505639e-06, "loss": 17.7452, "step": 18158 }, { "epoch": 0.3319319282724331, "grad_norm": 5.642670319090825, "learning_rate": 7.794118251575666e-06, "loss": 17.3011, "step": 18159 }, { "epoch": 0.33195020746887965, "grad_norm": 7.102703592615145, "learning_rate": 7.793872768301802e-06, "loss": 17.8441, "step": 18160 }, { "epoch": 0.3319684866653262, "grad_norm": 7.125349431069596, "learning_rate": 7.793627275235658e-06, "loss": 17.5985, "step": 18161 }, { "epoch": 0.3319867658617727, "grad_norm": 5.280807767518307, "learning_rate": 7.793381772378091e-06, "loss": 16.9641, "step": 18162 }, { "epoch": 0.33200504505821926, "grad_norm": 7.904987467500512, "learning_rate": 7.793136259729963e-06, "loss": 18.7803, "step": 18163 }, { "epoch": 0.33202332425466574, "grad_norm": 5.918237343746448, "learning_rate": 7.792890737292135e-06, "loss": 17.1779, "step": 18164 }, { "epoch": 0.3320416034511123, "grad_norm": 6.076838668528555, "learning_rate": 7.792645205065469e-06, "loss": 17.3053, "step": 18165 }, { "epoch": 0.3320598826475588, "grad_norm": 5.958843514612012, "learning_rate": 7.792399663050822e-06, "loss": 17.4298, "step": 18166 }, { "epoch": 0.33207816184400535, "grad_norm": 6.130864054531944, "learning_rate": 7.792154111249057e-06, "loss": 17.1949, "step": 18167 }, { "epoch": 0.3320964410404519, "grad_norm": 7.954768819297425, "learning_rate": 7.791908549661036e-06, "loss": 18.3735, "step": 18168 }, { "epoch": 0.33211472023689836, "grad_norm": 8.298811300794295, "learning_rate": 7.791662978287616e-06, "loss": 18.0898, "step": 18169 }, { "epoch": 0.3321329994333449, "grad_norm": 6.303685131205644, "learning_rate": 7.791417397129659e-06, "loss": 17.1811, "step": 18170 }, { "epoch": 0.33215127862979144, "grad_norm": 7.206124607693227, "learning_rate": 7.791171806188027e-06, "loss": 18.0821, "step": 18171 }, { "epoch": 0.33216955782623797, "grad_norm": 8.314122606992065, "learning_rate": 7.79092620546358e-06, "loss": 17.6907, "step": 18172 }, { "epoch": 0.3321878370226845, "grad_norm": 5.033311332280101, "learning_rate": 7.790680594957179e-06, "loss": 16.9447, "step": 18173 }, { "epoch": 0.332206116219131, "grad_norm": 6.494315316107332, "learning_rate": 7.790434974669685e-06, "loss": 17.2337, "step": 18174 }, { "epoch": 0.3322243954155775, "grad_norm": 6.53056235395755, "learning_rate": 7.790189344601957e-06, "loss": 17.7898, "step": 18175 }, { "epoch": 0.33224267461202406, "grad_norm": 6.62133568231179, "learning_rate": 7.789943704754859e-06, "loss": 17.6237, "step": 18176 }, { "epoch": 0.3322609538084706, "grad_norm": 7.443861748373182, "learning_rate": 7.789698055129248e-06, "loss": 17.912, "step": 18177 }, { "epoch": 0.33227923300491713, "grad_norm": 6.973394307184902, "learning_rate": 7.789452395725991e-06, "loss": 17.7439, "step": 18178 }, { "epoch": 0.3322975122013636, "grad_norm": 6.785497824905413, "learning_rate": 7.789206726545944e-06, "loss": 17.5712, "step": 18179 }, { "epoch": 0.33231579139781015, "grad_norm": 5.727668255250328, "learning_rate": 7.788961047589968e-06, "loss": 17.3711, "step": 18180 }, { "epoch": 0.3323340705942567, "grad_norm": 7.234093853885038, "learning_rate": 7.788715358858927e-06, "loss": 17.8033, "step": 18181 }, { "epoch": 0.3323523497907032, "grad_norm": 7.431557595833951, "learning_rate": 7.78846966035368e-06, "loss": 18.0144, "step": 18182 }, { "epoch": 0.33237062898714975, "grad_norm": 5.669439493474292, "learning_rate": 7.78822395207509e-06, "loss": 17.175, "step": 18183 }, { "epoch": 0.33238890818359623, "grad_norm": 5.84385140341315, "learning_rate": 7.787978234024014e-06, "loss": 17.4927, "step": 18184 }, { "epoch": 0.33240718738004277, "grad_norm": 6.780583032607836, "learning_rate": 7.78773250620132e-06, "loss": 17.8106, "step": 18185 }, { "epoch": 0.3324254665764893, "grad_norm": 7.255141407721134, "learning_rate": 7.787486768607864e-06, "loss": 17.8612, "step": 18186 }, { "epoch": 0.33244374577293584, "grad_norm": 6.983175297532281, "learning_rate": 7.787241021244509e-06, "loss": 17.8476, "step": 18187 }, { "epoch": 0.3324620249693823, "grad_norm": 7.714506912999816, "learning_rate": 7.786995264112113e-06, "loss": 17.8431, "step": 18188 }, { "epoch": 0.33248030416582885, "grad_norm": 6.635305280111997, "learning_rate": 7.786749497211545e-06, "loss": 17.844, "step": 18189 }, { "epoch": 0.3324985833622754, "grad_norm": 6.495138931650477, "learning_rate": 7.78650372054366e-06, "loss": 17.572, "step": 18190 }, { "epoch": 0.3325168625587219, "grad_norm": 6.49733321429841, "learning_rate": 7.786257934109321e-06, "loss": 17.6245, "step": 18191 }, { "epoch": 0.33253514175516846, "grad_norm": 7.198187102241127, "learning_rate": 7.78601213790939e-06, "loss": 17.7093, "step": 18192 }, { "epoch": 0.33255342095161494, "grad_norm": 6.14606092043527, "learning_rate": 7.785766331944729e-06, "loss": 17.6105, "step": 18193 }, { "epoch": 0.3325717001480615, "grad_norm": 5.960404881106066, "learning_rate": 7.785520516216196e-06, "loss": 17.5927, "step": 18194 }, { "epoch": 0.332589979344508, "grad_norm": 6.351337550024658, "learning_rate": 7.785274690724657e-06, "loss": 17.8266, "step": 18195 }, { "epoch": 0.33260825854095455, "grad_norm": 5.717737567289761, "learning_rate": 7.785028855470973e-06, "loss": 17.3423, "step": 18196 }, { "epoch": 0.3326265377374011, "grad_norm": 8.886182938977878, "learning_rate": 7.784783010456002e-06, "loss": 18.6777, "step": 18197 }, { "epoch": 0.33264481693384756, "grad_norm": 6.677696799615567, "learning_rate": 7.784537155680611e-06, "loss": 17.6191, "step": 18198 }, { "epoch": 0.3326630961302941, "grad_norm": 5.338467567289512, "learning_rate": 7.784291291145657e-06, "loss": 16.9954, "step": 18199 }, { "epoch": 0.33268137532674064, "grad_norm": 5.759579456797714, "learning_rate": 7.784045416852007e-06, "loss": 17.2598, "step": 18200 }, { "epoch": 0.33269965452318717, "grad_norm": 6.395268915261202, "learning_rate": 7.783799532800516e-06, "loss": 17.4145, "step": 18201 }, { "epoch": 0.3327179337196337, "grad_norm": 6.0361347411754505, "learning_rate": 7.78355363899205e-06, "loss": 17.2654, "step": 18202 }, { "epoch": 0.3327362129160802, "grad_norm": 6.208967974543009, "learning_rate": 7.78330773542747e-06, "loss": 17.4756, "step": 18203 }, { "epoch": 0.3327544921125267, "grad_norm": 7.052348690253904, "learning_rate": 7.783061822107637e-06, "loss": 17.6805, "step": 18204 }, { "epoch": 0.33277277130897326, "grad_norm": 8.153610445614932, "learning_rate": 7.782815899033415e-06, "loss": 18.003, "step": 18205 }, { "epoch": 0.3327910505054198, "grad_norm": 6.652723108848424, "learning_rate": 7.782569966205664e-06, "loss": 17.6864, "step": 18206 }, { "epoch": 0.33280932970186633, "grad_norm": 7.3738920706327615, "learning_rate": 7.782324023625247e-06, "loss": 17.7472, "step": 18207 }, { "epoch": 0.3328276088983128, "grad_norm": 5.475350667601087, "learning_rate": 7.782078071293026e-06, "loss": 17.132, "step": 18208 }, { "epoch": 0.33284588809475935, "grad_norm": 6.62703705335512, "learning_rate": 7.781832109209864e-06, "loss": 17.2494, "step": 18209 }, { "epoch": 0.3328641672912059, "grad_norm": 5.713141076718879, "learning_rate": 7.78158613737662e-06, "loss": 17.2371, "step": 18210 }, { "epoch": 0.3328824464876524, "grad_norm": 5.889611034039129, "learning_rate": 7.781340155794159e-06, "loss": 17.2982, "step": 18211 }, { "epoch": 0.33290072568409895, "grad_norm": 6.832460942448049, "learning_rate": 7.78109416446334e-06, "loss": 17.4735, "step": 18212 }, { "epoch": 0.33291900488054543, "grad_norm": 7.148140644955894, "learning_rate": 7.78084816338503e-06, "loss": 17.9011, "step": 18213 }, { "epoch": 0.33293728407699197, "grad_norm": 6.981509351000349, "learning_rate": 7.780602152560089e-06, "loss": 17.4855, "step": 18214 }, { "epoch": 0.3329555632734385, "grad_norm": 5.84717279478008, "learning_rate": 7.780356131989375e-06, "loss": 17.1238, "step": 18215 }, { "epoch": 0.33297384246988504, "grad_norm": 6.5923059057619104, "learning_rate": 7.780110101673758e-06, "loss": 17.6541, "step": 18216 }, { "epoch": 0.3329921216663316, "grad_norm": 7.385514389434467, "learning_rate": 7.779864061614094e-06, "loss": 17.5661, "step": 18217 }, { "epoch": 0.33301040086277806, "grad_norm": 6.295521554971102, "learning_rate": 7.779618011811248e-06, "loss": 17.6539, "step": 18218 }, { "epoch": 0.3330286800592246, "grad_norm": 6.767173193562697, "learning_rate": 7.779371952266082e-06, "loss": 17.3043, "step": 18219 }, { "epoch": 0.3330469592556711, "grad_norm": 5.589786289988833, "learning_rate": 7.779125882979458e-06, "loss": 17.3223, "step": 18220 }, { "epoch": 0.33306523845211766, "grad_norm": 7.416346672681761, "learning_rate": 7.778879803952242e-06, "loss": 17.8023, "step": 18221 }, { "epoch": 0.33308351764856414, "grad_norm": 5.774653161170778, "learning_rate": 7.77863371518529e-06, "loss": 17.1647, "step": 18222 }, { "epoch": 0.3331017968450107, "grad_norm": 7.831824156077151, "learning_rate": 7.77838761667947e-06, "loss": 17.9389, "step": 18223 }, { "epoch": 0.3331200760414572, "grad_norm": 5.982720017870738, "learning_rate": 7.778141508435641e-06, "loss": 17.2224, "step": 18224 }, { "epoch": 0.33313835523790375, "grad_norm": 7.700784768558035, "learning_rate": 7.777895390454669e-06, "loss": 17.8543, "step": 18225 }, { "epoch": 0.3331566344343503, "grad_norm": 8.116481306875674, "learning_rate": 7.777649262737412e-06, "loss": 18.4105, "step": 18226 }, { "epoch": 0.33317491363079677, "grad_norm": 6.267324299825966, "learning_rate": 7.777403125284737e-06, "loss": 17.1391, "step": 18227 }, { "epoch": 0.3331931928272433, "grad_norm": 6.52838322101029, "learning_rate": 7.777156978097505e-06, "loss": 17.8091, "step": 18228 }, { "epoch": 0.33321147202368984, "grad_norm": 6.199658199303566, "learning_rate": 7.776910821176578e-06, "loss": 17.317, "step": 18229 }, { "epoch": 0.33322975122013637, "grad_norm": 7.757411540484732, "learning_rate": 7.77666465452282e-06, "loss": 18.0858, "step": 18230 }, { "epoch": 0.3332480304165829, "grad_norm": 6.168299341975133, "learning_rate": 7.776418478137095e-06, "loss": 17.5393, "step": 18231 }, { "epoch": 0.3332663096130294, "grad_norm": 6.299672138464206, "learning_rate": 7.776172292020262e-06, "loss": 17.2522, "step": 18232 }, { "epoch": 0.3332845888094759, "grad_norm": 5.580060240216484, "learning_rate": 7.775926096173187e-06, "loss": 17.2081, "step": 18233 }, { "epoch": 0.33330286800592246, "grad_norm": 6.557004094769657, "learning_rate": 7.775679890596731e-06, "loss": 17.5013, "step": 18234 }, { "epoch": 0.333321147202369, "grad_norm": 8.150564436436557, "learning_rate": 7.77543367529176e-06, "loss": 17.8301, "step": 18235 }, { "epoch": 0.33333942639881553, "grad_norm": 6.586964449226526, "learning_rate": 7.775187450259132e-06, "loss": 17.5818, "step": 18236 }, { "epoch": 0.333357705595262, "grad_norm": 5.918227388140869, "learning_rate": 7.774941215499715e-06, "loss": 17.1627, "step": 18237 }, { "epoch": 0.33337598479170855, "grad_norm": 5.814865474293571, "learning_rate": 7.774694971014366e-06, "loss": 17.4967, "step": 18238 }, { "epoch": 0.3333942639881551, "grad_norm": 6.3666921474352005, "learning_rate": 7.774448716803957e-06, "loss": 17.9214, "step": 18239 }, { "epoch": 0.3334125431846016, "grad_norm": 5.805887642846262, "learning_rate": 7.774202452869343e-06, "loss": 17.3668, "step": 18240 }, { "epoch": 0.33343082238104815, "grad_norm": 6.024066042154094, "learning_rate": 7.773956179211392e-06, "loss": 17.3197, "step": 18241 }, { "epoch": 0.33344910157749463, "grad_norm": 6.4865101154431555, "learning_rate": 7.773709895830964e-06, "loss": 17.2924, "step": 18242 }, { "epoch": 0.33346738077394117, "grad_norm": 6.695215422941599, "learning_rate": 7.773463602728923e-06, "loss": 17.669, "step": 18243 }, { "epoch": 0.3334856599703877, "grad_norm": 6.694233011803048, "learning_rate": 7.773217299906134e-06, "loss": 17.7665, "step": 18244 }, { "epoch": 0.33350393916683424, "grad_norm": 5.820866749295414, "learning_rate": 7.772970987363458e-06, "loss": 17.1573, "step": 18245 }, { "epoch": 0.3335222183632808, "grad_norm": 6.240684066895211, "learning_rate": 7.772724665101761e-06, "loss": 17.4504, "step": 18246 }, { "epoch": 0.33354049755972726, "grad_norm": 6.125521233582662, "learning_rate": 7.772478333121904e-06, "loss": 17.2956, "step": 18247 }, { "epoch": 0.3335587767561738, "grad_norm": 7.79943240506785, "learning_rate": 7.77223199142475e-06, "loss": 18.0713, "step": 18248 }, { "epoch": 0.3335770559526203, "grad_norm": 8.366673337653188, "learning_rate": 7.771985640011163e-06, "loss": 18.4496, "step": 18249 }, { "epoch": 0.33359533514906686, "grad_norm": 7.069364827523432, "learning_rate": 7.771739278882009e-06, "loss": 17.8833, "step": 18250 }, { "epoch": 0.3336136143455134, "grad_norm": 5.865046767256241, "learning_rate": 7.771492908038147e-06, "loss": 17.2996, "step": 18251 }, { "epoch": 0.3336318935419599, "grad_norm": 7.32448449067599, "learning_rate": 7.771246527480446e-06, "loss": 17.7859, "step": 18252 }, { "epoch": 0.3336501727384064, "grad_norm": 6.180994553510096, "learning_rate": 7.771000137209763e-06, "loss": 17.5074, "step": 18253 }, { "epoch": 0.33366845193485295, "grad_norm": 6.730369171626773, "learning_rate": 7.770753737226965e-06, "loss": 17.6742, "step": 18254 }, { "epoch": 0.3336867311312995, "grad_norm": 7.494552997378219, "learning_rate": 7.77050732753292e-06, "loss": 17.7578, "step": 18255 }, { "epoch": 0.33370501032774597, "grad_norm": 6.277815491673119, "learning_rate": 7.770260908128481e-06, "loss": 17.4783, "step": 18256 }, { "epoch": 0.3337232895241925, "grad_norm": 6.2061717937115715, "learning_rate": 7.770014479014523e-06, "loss": 17.5916, "step": 18257 }, { "epoch": 0.33374156872063904, "grad_norm": 7.7996121002910055, "learning_rate": 7.769768040191904e-06, "loss": 18.2667, "step": 18258 }, { "epoch": 0.3337598479170856, "grad_norm": 8.257618892263123, "learning_rate": 7.769521591661487e-06, "loss": 18.329, "step": 18259 }, { "epoch": 0.3337781271135321, "grad_norm": 7.268356090039945, "learning_rate": 7.769275133424135e-06, "loss": 18.227, "step": 18260 }, { "epoch": 0.3337964063099786, "grad_norm": 6.236784550460681, "learning_rate": 7.769028665480718e-06, "loss": 17.495, "step": 18261 }, { "epoch": 0.3338146855064251, "grad_norm": 5.826446793854041, "learning_rate": 7.768782187832094e-06, "loss": 17.0138, "step": 18262 }, { "epoch": 0.33383296470287166, "grad_norm": 5.045773038751963, "learning_rate": 7.768535700479128e-06, "loss": 17.0794, "step": 18263 }, { "epoch": 0.3338512438993182, "grad_norm": 6.339256647183979, "learning_rate": 7.768289203422685e-06, "loss": 17.3311, "step": 18264 }, { "epoch": 0.33386952309576473, "grad_norm": 6.1668381429627415, "learning_rate": 7.768042696663629e-06, "loss": 17.2583, "step": 18265 }, { "epoch": 0.3338878022922112, "grad_norm": 6.779129137618555, "learning_rate": 7.767796180202823e-06, "loss": 17.6344, "step": 18266 }, { "epoch": 0.33390608148865775, "grad_norm": 7.8809435520336955, "learning_rate": 7.767549654041132e-06, "loss": 17.9554, "step": 18267 }, { "epoch": 0.3339243606851043, "grad_norm": 7.234332435835093, "learning_rate": 7.767303118179422e-06, "loss": 17.8294, "step": 18268 }, { "epoch": 0.3339426398815508, "grad_norm": 5.744768987872191, "learning_rate": 7.76705657261855e-06, "loss": 17.2128, "step": 18269 }, { "epoch": 0.33396091907799735, "grad_norm": 7.181589881128322, "learning_rate": 7.766810017359387e-06, "loss": 17.7517, "step": 18270 }, { "epoch": 0.33397919827444383, "grad_norm": 7.6324903882375645, "learning_rate": 7.766563452402796e-06, "loss": 18.0289, "step": 18271 }, { "epoch": 0.33399747747089037, "grad_norm": 7.045548758291174, "learning_rate": 7.766316877749641e-06, "loss": 17.5929, "step": 18272 }, { "epoch": 0.3340157566673369, "grad_norm": 5.48986111322997, "learning_rate": 7.766070293400783e-06, "loss": 17.0748, "step": 18273 }, { "epoch": 0.33403403586378344, "grad_norm": 6.950841627839012, "learning_rate": 7.765823699357089e-06, "loss": 17.7548, "step": 18274 }, { "epoch": 0.33405231506023, "grad_norm": 7.359007079523373, "learning_rate": 7.765577095619423e-06, "loss": 17.6903, "step": 18275 }, { "epoch": 0.33407059425667646, "grad_norm": 6.8479912684776725, "learning_rate": 7.765330482188649e-06, "loss": 17.1306, "step": 18276 }, { "epoch": 0.334088873453123, "grad_norm": 7.270761509348165, "learning_rate": 7.765083859065631e-06, "loss": 17.8807, "step": 18277 }, { "epoch": 0.3341071526495695, "grad_norm": 6.29190187688376, "learning_rate": 7.764837226251237e-06, "loss": 17.6153, "step": 18278 }, { "epoch": 0.33412543184601606, "grad_norm": 6.576013848161763, "learning_rate": 7.764590583746328e-06, "loss": 17.4124, "step": 18279 }, { "epoch": 0.3341437110424626, "grad_norm": 6.887204970856277, "learning_rate": 7.764343931551765e-06, "loss": 17.745, "step": 18280 }, { "epoch": 0.3341619902389091, "grad_norm": 7.233508854667866, "learning_rate": 7.76409726966842e-06, "loss": 18.0759, "step": 18281 }, { "epoch": 0.3341802694353556, "grad_norm": 7.782221930944987, "learning_rate": 7.763850598097154e-06, "loss": 18.2327, "step": 18282 }, { "epoch": 0.33419854863180215, "grad_norm": 5.774946535351223, "learning_rate": 7.76360391683883e-06, "loss": 17.1124, "step": 18283 }, { "epoch": 0.3342168278282487, "grad_norm": 7.142259606946519, "learning_rate": 7.763357225894314e-06, "loss": 17.9635, "step": 18284 }, { "epoch": 0.3342351070246952, "grad_norm": 6.552254980874395, "learning_rate": 7.763110525264471e-06, "loss": 17.6825, "step": 18285 }, { "epoch": 0.3342533862211417, "grad_norm": 7.166106033103709, "learning_rate": 7.762863814950165e-06, "loss": 17.816, "step": 18286 }, { "epoch": 0.33427166541758824, "grad_norm": 6.908900588446828, "learning_rate": 7.762617094952262e-06, "loss": 18.2036, "step": 18287 }, { "epoch": 0.3342899446140348, "grad_norm": 7.4750743296947375, "learning_rate": 7.762370365271625e-06, "loss": 18.0611, "step": 18288 }, { "epoch": 0.3343082238104813, "grad_norm": 6.420174060591953, "learning_rate": 7.76212362590912e-06, "loss": 17.6022, "step": 18289 }, { "epoch": 0.3343265030069278, "grad_norm": 6.725124424115284, "learning_rate": 7.761876876865612e-06, "loss": 17.3032, "step": 18290 }, { "epoch": 0.3343447822033743, "grad_norm": 6.210333346303373, "learning_rate": 7.761630118141966e-06, "loss": 17.2584, "step": 18291 }, { "epoch": 0.33436306139982086, "grad_norm": 7.46718203655446, "learning_rate": 7.761383349739045e-06, "loss": 18.3899, "step": 18292 }, { "epoch": 0.3343813405962674, "grad_norm": 5.809202179301146, "learning_rate": 7.761136571657714e-06, "loss": 17.3044, "step": 18293 }, { "epoch": 0.33439961979271393, "grad_norm": 7.969665938088734, "learning_rate": 7.760889783898839e-06, "loss": 18.0716, "step": 18294 }, { "epoch": 0.3344178989891604, "grad_norm": 7.580848556915434, "learning_rate": 7.760642986463284e-06, "loss": 18.0002, "step": 18295 }, { "epoch": 0.33443617818560695, "grad_norm": 5.325450503699952, "learning_rate": 7.760396179351919e-06, "loss": 17.0275, "step": 18296 }, { "epoch": 0.3344544573820535, "grad_norm": 7.614931020495053, "learning_rate": 7.760149362565602e-06, "loss": 18.0945, "step": 18297 }, { "epoch": 0.3344727365785, "grad_norm": 6.624587523393241, "learning_rate": 7.7599025361052e-06, "loss": 17.1637, "step": 18298 }, { "epoch": 0.33449101577494655, "grad_norm": 6.067403262655608, "learning_rate": 7.759655699971581e-06, "loss": 17.3723, "step": 18299 }, { "epoch": 0.33450929497139303, "grad_norm": 7.451357832709255, "learning_rate": 7.759408854165608e-06, "loss": 17.8072, "step": 18300 }, { "epoch": 0.33452757416783957, "grad_norm": 5.248124535174485, "learning_rate": 7.759161998688145e-06, "loss": 16.988, "step": 18301 }, { "epoch": 0.3345458533642861, "grad_norm": 5.894392508469301, "learning_rate": 7.758915133540059e-06, "loss": 17.2724, "step": 18302 }, { "epoch": 0.33456413256073264, "grad_norm": 8.490995476710541, "learning_rate": 7.758668258722217e-06, "loss": 17.5968, "step": 18303 }, { "epoch": 0.3345824117571792, "grad_norm": 6.3621774839835705, "learning_rate": 7.758421374235481e-06, "loss": 17.3985, "step": 18304 }, { "epoch": 0.33460069095362566, "grad_norm": 6.348004245125481, "learning_rate": 7.758174480080717e-06, "loss": 17.4257, "step": 18305 }, { "epoch": 0.3346189701500722, "grad_norm": 6.258615994091, "learning_rate": 7.75792757625879e-06, "loss": 17.6457, "step": 18306 }, { "epoch": 0.33463724934651873, "grad_norm": 6.986781705578218, "learning_rate": 7.757680662770568e-06, "loss": 17.6274, "step": 18307 }, { "epoch": 0.33465552854296526, "grad_norm": 6.621352092591684, "learning_rate": 7.757433739616913e-06, "loss": 17.8456, "step": 18308 }, { "epoch": 0.3346738077394118, "grad_norm": 5.42694561819176, "learning_rate": 7.757186806798693e-06, "loss": 17.1311, "step": 18309 }, { "epoch": 0.3346920869358583, "grad_norm": 6.145164943286487, "learning_rate": 7.756939864316773e-06, "loss": 17.3195, "step": 18310 }, { "epoch": 0.3347103661323048, "grad_norm": 6.99898461330753, "learning_rate": 7.756692912172017e-06, "loss": 17.8293, "step": 18311 }, { "epoch": 0.33472864532875135, "grad_norm": 7.338612922482191, "learning_rate": 7.756445950365292e-06, "loss": 17.5407, "step": 18312 }, { "epoch": 0.3347469245251979, "grad_norm": 6.485677018272688, "learning_rate": 7.756198978897463e-06, "loss": 17.7522, "step": 18313 }, { "epoch": 0.3347652037216444, "grad_norm": 7.882693990131259, "learning_rate": 7.755951997769395e-06, "loss": 17.9094, "step": 18314 }, { "epoch": 0.3347834829180909, "grad_norm": 6.346007494365597, "learning_rate": 7.755705006981955e-06, "loss": 17.3265, "step": 18315 }, { "epoch": 0.33480176211453744, "grad_norm": 6.206414249888963, "learning_rate": 7.75545800653601e-06, "loss": 17.2567, "step": 18316 }, { "epoch": 0.334820041310984, "grad_norm": 6.55249888096095, "learning_rate": 7.755210996432421e-06, "loss": 17.4971, "step": 18317 }, { "epoch": 0.3348383205074305, "grad_norm": 6.964432634711099, "learning_rate": 7.754963976672056e-06, "loss": 17.7342, "step": 18318 }, { "epoch": 0.33485659970387704, "grad_norm": 7.916931176316435, "learning_rate": 7.754716947255784e-06, "loss": 17.8617, "step": 18319 }, { "epoch": 0.3348748789003235, "grad_norm": 6.461639288916398, "learning_rate": 7.754469908184467e-06, "loss": 17.347, "step": 18320 }, { "epoch": 0.33489315809677006, "grad_norm": 5.057605319358128, "learning_rate": 7.754222859458973e-06, "loss": 16.8883, "step": 18321 }, { "epoch": 0.3349114372932166, "grad_norm": 6.832876065541013, "learning_rate": 7.753975801080165e-06, "loss": 17.5447, "step": 18322 }, { "epoch": 0.33492971648966313, "grad_norm": 6.864591411239356, "learning_rate": 7.753728733048911e-06, "loss": 17.6954, "step": 18323 }, { "epoch": 0.3349479956861096, "grad_norm": 6.001523010916419, "learning_rate": 7.753481655366077e-06, "loss": 17.3145, "step": 18324 }, { "epoch": 0.33496627488255615, "grad_norm": 5.667306709259325, "learning_rate": 7.75323456803253e-06, "loss": 17.0702, "step": 18325 }, { "epoch": 0.3349845540790027, "grad_norm": 6.639306807494685, "learning_rate": 7.752987471049133e-06, "loss": 17.4848, "step": 18326 }, { "epoch": 0.3350028332754492, "grad_norm": 6.2005682970654865, "learning_rate": 7.752740364416756e-06, "loss": 17.4231, "step": 18327 }, { "epoch": 0.33502111247189575, "grad_norm": 6.852004423139621, "learning_rate": 7.752493248136262e-06, "loss": 17.8458, "step": 18328 }, { "epoch": 0.33503939166834223, "grad_norm": 6.242489731981976, "learning_rate": 7.752246122208515e-06, "loss": 17.3899, "step": 18329 }, { "epoch": 0.33505767086478877, "grad_norm": 6.761276698732926, "learning_rate": 7.751998986634388e-06, "loss": 17.3753, "step": 18330 }, { "epoch": 0.3350759500612353, "grad_norm": 8.4697101881472, "learning_rate": 7.751751841414742e-06, "loss": 18.2433, "step": 18331 }, { "epoch": 0.33509422925768184, "grad_norm": 5.98340590415751, "learning_rate": 7.751504686550444e-06, "loss": 17.3087, "step": 18332 }, { "epoch": 0.3351125084541284, "grad_norm": 4.63306568016994, "learning_rate": 7.75125752204236e-06, "loss": 16.8643, "step": 18333 }, { "epoch": 0.33513078765057486, "grad_norm": 6.874276635523743, "learning_rate": 7.751010347891361e-06, "loss": 17.6956, "step": 18334 }, { "epoch": 0.3351490668470214, "grad_norm": 7.056705825769277, "learning_rate": 7.750763164098308e-06, "loss": 18.1054, "step": 18335 }, { "epoch": 0.33516734604346793, "grad_norm": 6.022894070645962, "learning_rate": 7.750515970664066e-06, "loss": 17.2611, "step": 18336 }, { "epoch": 0.33518562523991446, "grad_norm": 6.4601713256358595, "learning_rate": 7.750268767589507e-06, "loss": 17.4091, "step": 18337 }, { "epoch": 0.335203904436361, "grad_norm": 7.631234410012835, "learning_rate": 7.750021554875493e-06, "loss": 18.0535, "step": 18338 }, { "epoch": 0.3352221836328075, "grad_norm": 6.5660887452535865, "learning_rate": 7.749774332522894e-06, "loss": 17.5306, "step": 18339 }, { "epoch": 0.335240462829254, "grad_norm": 6.38797809805494, "learning_rate": 7.749527100532572e-06, "loss": 17.5954, "step": 18340 }, { "epoch": 0.33525874202570055, "grad_norm": 6.6464765680954505, "learning_rate": 7.749279858905398e-06, "loss": 17.9632, "step": 18341 }, { "epoch": 0.3352770212221471, "grad_norm": 5.948301317242268, "learning_rate": 7.749032607642237e-06, "loss": 17.1922, "step": 18342 }, { "epoch": 0.3352953004185936, "grad_norm": 5.775275108449763, "learning_rate": 7.748785346743955e-06, "loss": 17.3549, "step": 18343 }, { "epoch": 0.3353135796150401, "grad_norm": 5.930169817187332, "learning_rate": 7.748538076211418e-06, "loss": 17.362, "step": 18344 }, { "epoch": 0.33533185881148664, "grad_norm": 6.230980678229091, "learning_rate": 7.748290796045493e-06, "loss": 17.4745, "step": 18345 }, { "epoch": 0.3353501380079332, "grad_norm": 6.814774819011009, "learning_rate": 7.74804350624705e-06, "loss": 17.7425, "step": 18346 }, { "epoch": 0.3353684172043797, "grad_norm": 7.52974247559615, "learning_rate": 7.74779620681695e-06, "loss": 17.9699, "step": 18347 }, { "epoch": 0.33538669640082625, "grad_norm": 6.217013645338138, "learning_rate": 7.747548897756063e-06, "loss": 17.175, "step": 18348 }, { "epoch": 0.3354049755972727, "grad_norm": 6.00741862194668, "learning_rate": 7.747301579065256e-06, "loss": 17.1926, "step": 18349 }, { "epoch": 0.33542325479371926, "grad_norm": 6.8412301633342105, "learning_rate": 7.747054250745396e-06, "loss": 17.6055, "step": 18350 }, { "epoch": 0.3354415339901658, "grad_norm": 6.638726934444085, "learning_rate": 7.746806912797349e-06, "loss": 17.6016, "step": 18351 }, { "epoch": 0.33545981318661233, "grad_norm": 7.583313276990292, "learning_rate": 7.746559565221983e-06, "loss": 18.0188, "step": 18352 }, { "epoch": 0.33547809238305887, "grad_norm": 7.2380370876110645, "learning_rate": 7.746312208020164e-06, "loss": 18.0414, "step": 18353 }, { "epoch": 0.33549637157950535, "grad_norm": 7.097301550884373, "learning_rate": 7.746064841192757e-06, "loss": 17.6619, "step": 18354 }, { "epoch": 0.3355146507759519, "grad_norm": 6.277958463243101, "learning_rate": 7.745817464740633e-06, "loss": 17.5832, "step": 18355 }, { "epoch": 0.3355329299723984, "grad_norm": 6.045685301953929, "learning_rate": 7.745570078664655e-06, "loss": 17.2695, "step": 18356 }, { "epoch": 0.33555120916884495, "grad_norm": 7.749337621853986, "learning_rate": 7.745322682965693e-06, "loss": 17.9238, "step": 18357 }, { "epoch": 0.33556948836529144, "grad_norm": 7.15648722064878, "learning_rate": 7.745075277644615e-06, "loss": 18.0479, "step": 18358 }, { "epoch": 0.33558776756173797, "grad_norm": 6.481833282438767, "learning_rate": 7.744827862702284e-06, "loss": 17.5936, "step": 18359 }, { "epoch": 0.3356060467581845, "grad_norm": 7.27669269267047, "learning_rate": 7.744580438139571e-06, "loss": 17.9004, "step": 18360 }, { "epoch": 0.33562432595463104, "grad_norm": 6.198573375094083, "learning_rate": 7.744333003957341e-06, "loss": 17.412, "step": 18361 }, { "epoch": 0.3356426051510776, "grad_norm": 6.3126170267845465, "learning_rate": 7.744085560156462e-06, "loss": 17.565, "step": 18362 }, { "epoch": 0.33566088434752406, "grad_norm": 6.722386020348753, "learning_rate": 7.743838106737802e-06, "loss": 17.2823, "step": 18363 }, { "epoch": 0.3356791635439706, "grad_norm": 8.532039182448148, "learning_rate": 7.743590643702227e-06, "loss": 18.4358, "step": 18364 }, { "epoch": 0.33569744274041713, "grad_norm": 7.019269928101385, "learning_rate": 7.743343171050604e-06, "loss": 17.7279, "step": 18365 }, { "epoch": 0.33571572193686366, "grad_norm": 6.2363754879825795, "learning_rate": 7.743095688783803e-06, "loss": 17.4868, "step": 18366 }, { "epoch": 0.3357340011333102, "grad_norm": 6.547608551667183, "learning_rate": 7.742848196902688e-06, "loss": 17.6869, "step": 18367 }, { "epoch": 0.3357522803297567, "grad_norm": 6.524650132021672, "learning_rate": 7.742600695408128e-06, "loss": 17.6394, "step": 18368 }, { "epoch": 0.3357705595262032, "grad_norm": 7.580916121783052, "learning_rate": 7.742353184300992e-06, "loss": 17.504, "step": 18369 }, { "epoch": 0.33578883872264975, "grad_norm": 7.134162781073809, "learning_rate": 7.742105663582145e-06, "loss": 17.9606, "step": 18370 }, { "epoch": 0.3358071179190963, "grad_norm": 6.742971936651327, "learning_rate": 7.741858133252456e-06, "loss": 17.4696, "step": 18371 }, { "epoch": 0.3358253971155428, "grad_norm": 5.756909069664118, "learning_rate": 7.741610593312794e-06, "loss": 17.2435, "step": 18372 }, { "epoch": 0.3358436763119893, "grad_norm": 6.655582882159083, "learning_rate": 7.741363043764023e-06, "loss": 17.3407, "step": 18373 }, { "epoch": 0.33586195550843584, "grad_norm": 5.649762551970552, "learning_rate": 7.741115484607011e-06, "loss": 17.1598, "step": 18374 }, { "epoch": 0.3358802347048824, "grad_norm": 5.345426745039483, "learning_rate": 7.74086791584263e-06, "loss": 17.0716, "step": 18375 }, { "epoch": 0.3358985139013289, "grad_norm": 6.107630936871481, "learning_rate": 7.740620337471743e-06, "loss": 17.4788, "step": 18376 }, { "epoch": 0.33591679309777545, "grad_norm": 6.792138438895729, "learning_rate": 7.74037274949522e-06, "loss": 17.4798, "step": 18377 }, { "epoch": 0.3359350722942219, "grad_norm": 5.853056285933311, "learning_rate": 7.740125151913927e-06, "loss": 17.2303, "step": 18378 }, { "epoch": 0.33595335149066846, "grad_norm": 6.464523229272147, "learning_rate": 7.739877544728734e-06, "loss": 17.5476, "step": 18379 }, { "epoch": 0.335971630687115, "grad_norm": 8.422027448603725, "learning_rate": 7.739629927940508e-06, "loss": 18.2603, "step": 18380 }, { "epoch": 0.33598990988356153, "grad_norm": 6.288018775709371, "learning_rate": 7.739382301550117e-06, "loss": 17.6214, "step": 18381 }, { "epoch": 0.33600818908000807, "grad_norm": 6.475994648492222, "learning_rate": 7.73913466555843e-06, "loss": 17.4556, "step": 18382 }, { "epoch": 0.33602646827645455, "grad_norm": 5.701573680999045, "learning_rate": 7.738887019966312e-06, "loss": 17.106, "step": 18383 }, { "epoch": 0.3360447474729011, "grad_norm": 7.0780042943270125, "learning_rate": 7.738639364774633e-06, "loss": 17.6859, "step": 18384 }, { "epoch": 0.3360630266693476, "grad_norm": 7.001881184847415, "learning_rate": 7.73839169998426e-06, "loss": 18.0078, "step": 18385 }, { "epoch": 0.33608130586579416, "grad_norm": 7.605615146116568, "learning_rate": 7.738144025596063e-06, "loss": 18.2703, "step": 18386 }, { "epoch": 0.3360995850622407, "grad_norm": 6.939675288723311, "learning_rate": 7.737896341610908e-06, "loss": 17.482, "step": 18387 }, { "epoch": 0.33611786425868717, "grad_norm": 6.544616756770822, "learning_rate": 7.737648648029664e-06, "loss": 17.7247, "step": 18388 }, { "epoch": 0.3361361434551337, "grad_norm": 5.560292907012687, "learning_rate": 7.737400944853201e-06, "loss": 17.3386, "step": 18389 }, { "epoch": 0.33615442265158024, "grad_norm": 6.072849495003731, "learning_rate": 7.737153232082383e-06, "loss": 17.3119, "step": 18390 }, { "epoch": 0.3361727018480268, "grad_norm": 7.224700545550032, "learning_rate": 7.73690550971808e-06, "loss": 17.9829, "step": 18391 }, { "epoch": 0.33619098104447326, "grad_norm": 7.312227418371407, "learning_rate": 7.736657777761164e-06, "loss": 17.7287, "step": 18392 }, { "epoch": 0.3362092602409198, "grad_norm": 8.927086043083467, "learning_rate": 7.736410036212497e-06, "loss": 18.3927, "step": 18393 }, { "epoch": 0.33622753943736633, "grad_norm": 6.14345139295062, "learning_rate": 7.73616228507295e-06, "loss": 17.3107, "step": 18394 }, { "epoch": 0.33624581863381287, "grad_norm": 8.08745349602045, "learning_rate": 7.735914524343393e-06, "loss": 18.4474, "step": 18395 }, { "epoch": 0.3362640978302594, "grad_norm": 7.518355698459249, "learning_rate": 7.735666754024692e-06, "loss": 18.0125, "step": 18396 }, { "epoch": 0.3362823770267059, "grad_norm": 7.670188487304936, "learning_rate": 7.735418974117716e-06, "loss": 18.2755, "step": 18397 }, { "epoch": 0.3363006562231524, "grad_norm": 7.363685793543698, "learning_rate": 7.735171184623336e-06, "loss": 17.5832, "step": 18398 }, { "epoch": 0.33631893541959895, "grad_norm": 7.797323413026644, "learning_rate": 7.734923385542417e-06, "loss": 18.2299, "step": 18399 }, { "epoch": 0.3363372146160455, "grad_norm": 6.175064421290564, "learning_rate": 7.734675576875828e-06, "loss": 17.5104, "step": 18400 }, { "epoch": 0.336355493812492, "grad_norm": 6.925918699168115, "learning_rate": 7.73442775862444e-06, "loss": 17.8916, "step": 18401 }, { "epoch": 0.3363737730089385, "grad_norm": 6.313385351587444, "learning_rate": 7.734179930789119e-06, "loss": 17.4044, "step": 18402 }, { "epoch": 0.33639205220538504, "grad_norm": 6.600660588623588, "learning_rate": 7.733932093370735e-06, "loss": 17.4565, "step": 18403 }, { "epoch": 0.3364103314018316, "grad_norm": 6.081680885442603, "learning_rate": 7.733684246370156e-06, "loss": 17.4435, "step": 18404 }, { "epoch": 0.3364286105982781, "grad_norm": 6.066761271916142, "learning_rate": 7.733436389788252e-06, "loss": 17.3945, "step": 18405 }, { "epoch": 0.33644688979472465, "grad_norm": 5.79555519497916, "learning_rate": 7.73318852362589e-06, "loss": 17.3575, "step": 18406 }, { "epoch": 0.3364651689911711, "grad_norm": 7.031841822332528, "learning_rate": 7.732940647883939e-06, "loss": 17.3679, "step": 18407 }, { "epoch": 0.33648344818761766, "grad_norm": 5.652500007778597, "learning_rate": 7.732692762563267e-06, "loss": 17.395, "step": 18408 }, { "epoch": 0.3365017273840642, "grad_norm": 7.356023526358955, "learning_rate": 7.732444867664746e-06, "loss": 18.0134, "step": 18409 }, { "epoch": 0.33652000658051073, "grad_norm": 6.5753188549896775, "learning_rate": 7.732196963189243e-06, "loss": 17.7415, "step": 18410 }, { "epoch": 0.33653828577695727, "grad_norm": 6.340180504691334, "learning_rate": 7.731949049137627e-06, "loss": 17.2667, "step": 18411 }, { "epoch": 0.33655656497340375, "grad_norm": 6.867459569486875, "learning_rate": 7.731701125510764e-06, "loss": 17.7586, "step": 18412 }, { "epoch": 0.3365748441698503, "grad_norm": 9.345006651836984, "learning_rate": 7.731453192309529e-06, "loss": 18.6728, "step": 18413 }, { "epoch": 0.3365931233662968, "grad_norm": 6.909630227126561, "learning_rate": 7.731205249534785e-06, "loss": 17.6986, "step": 18414 }, { "epoch": 0.33661140256274336, "grad_norm": 7.7838930350249935, "learning_rate": 7.730957297187403e-06, "loss": 17.6255, "step": 18415 }, { "epoch": 0.3366296817591899, "grad_norm": 6.265011349717121, "learning_rate": 7.730709335268256e-06, "loss": 17.4874, "step": 18416 }, { "epoch": 0.33664796095563637, "grad_norm": 8.02861059803375, "learning_rate": 7.730461363778206e-06, "loss": 18.3071, "step": 18417 }, { "epoch": 0.3366662401520829, "grad_norm": 5.596711905591294, "learning_rate": 7.730213382718129e-06, "loss": 17.1153, "step": 18418 }, { "epoch": 0.33668451934852944, "grad_norm": 7.972988658366918, "learning_rate": 7.72996539208889e-06, "loss": 18.1299, "step": 18419 }, { "epoch": 0.336702798544976, "grad_norm": 8.120943580975988, "learning_rate": 7.729717391891358e-06, "loss": 18.3888, "step": 18420 }, { "epoch": 0.3367210777414225, "grad_norm": 6.731044068945436, "learning_rate": 7.729469382126405e-06, "loss": 17.5747, "step": 18421 }, { "epoch": 0.336739356937869, "grad_norm": 7.289569065858472, "learning_rate": 7.729221362794897e-06, "loss": 17.9513, "step": 18422 }, { "epoch": 0.33675763613431553, "grad_norm": 5.743911337736639, "learning_rate": 7.728973333897707e-06, "loss": 17.2202, "step": 18423 }, { "epoch": 0.33677591533076207, "grad_norm": 6.662068868190871, "learning_rate": 7.728725295435701e-06, "loss": 17.6026, "step": 18424 }, { "epoch": 0.3367941945272086, "grad_norm": 9.093729319320211, "learning_rate": 7.72847724740975e-06, "loss": 18.9263, "step": 18425 }, { "epoch": 0.3368124737236551, "grad_norm": 6.455217427397297, "learning_rate": 7.728229189820721e-06, "loss": 17.7361, "step": 18426 }, { "epoch": 0.3368307529201016, "grad_norm": 7.146877314145098, "learning_rate": 7.72798112266949e-06, "loss": 17.7652, "step": 18427 }, { "epoch": 0.33684903211654815, "grad_norm": 7.404027428581642, "learning_rate": 7.727733045956919e-06, "loss": 17.6531, "step": 18428 }, { "epoch": 0.3368673113129947, "grad_norm": 6.469316020147925, "learning_rate": 7.72748495968388e-06, "loss": 17.7893, "step": 18429 }, { "epoch": 0.3368855905094412, "grad_norm": 7.740519309697861, "learning_rate": 7.727236863851243e-06, "loss": 17.9784, "step": 18430 }, { "epoch": 0.3369038697058877, "grad_norm": 7.096828766894045, "learning_rate": 7.726988758459877e-06, "loss": 17.7158, "step": 18431 }, { "epoch": 0.33692214890233424, "grad_norm": 6.821265096886873, "learning_rate": 7.726740643510654e-06, "loss": 17.6382, "step": 18432 }, { "epoch": 0.3369404280987808, "grad_norm": 7.026946958908574, "learning_rate": 7.72649251900444e-06, "loss": 18.1321, "step": 18433 }, { "epoch": 0.3369587072952273, "grad_norm": 6.347502405851839, "learning_rate": 7.726244384942108e-06, "loss": 17.4784, "step": 18434 }, { "epoch": 0.33697698649167385, "grad_norm": 5.277570150694714, "learning_rate": 7.725996241324524e-06, "loss": 17.0442, "step": 18435 }, { "epoch": 0.3369952656881203, "grad_norm": 5.521964833824245, "learning_rate": 7.725748088152561e-06, "loss": 17.1314, "step": 18436 }, { "epoch": 0.33701354488456686, "grad_norm": 7.197662323340944, "learning_rate": 7.725499925427086e-06, "loss": 17.6402, "step": 18437 }, { "epoch": 0.3370318240810134, "grad_norm": 6.006474924625444, "learning_rate": 7.725251753148972e-06, "loss": 17.4828, "step": 18438 }, { "epoch": 0.33705010327745993, "grad_norm": 5.059700720043585, "learning_rate": 7.725003571319086e-06, "loss": 16.9115, "step": 18439 }, { "epoch": 0.33706838247390647, "grad_norm": 6.864063054870559, "learning_rate": 7.7247553799383e-06, "loss": 17.577, "step": 18440 }, { "epoch": 0.33708666167035295, "grad_norm": 6.52387554693364, "learning_rate": 7.724507179007484e-06, "loss": 17.5513, "step": 18441 }, { "epoch": 0.3371049408667995, "grad_norm": 5.43570866174775, "learning_rate": 7.724258968527503e-06, "loss": 17.2563, "step": 18442 }, { "epoch": 0.337123220063246, "grad_norm": 7.3973909691611395, "learning_rate": 7.724010748499232e-06, "loss": 17.9174, "step": 18443 }, { "epoch": 0.33714149925969256, "grad_norm": 6.778630546710854, "learning_rate": 7.72376251892354e-06, "loss": 17.6066, "step": 18444 }, { "epoch": 0.3371597784561391, "grad_norm": 5.961436839492804, "learning_rate": 7.723514279801298e-06, "loss": 17.3852, "step": 18445 }, { "epoch": 0.33717805765258557, "grad_norm": 7.062129788464218, "learning_rate": 7.723266031133373e-06, "loss": 17.9826, "step": 18446 }, { "epoch": 0.3371963368490321, "grad_norm": 7.432303147374123, "learning_rate": 7.72301777292064e-06, "loss": 17.7383, "step": 18447 }, { "epoch": 0.33721461604547864, "grad_norm": 7.742647669234327, "learning_rate": 7.722769505163963e-06, "loss": 18.1091, "step": 18448 }, { "epoch": 0.3372328952419252, "grad_norm": 7.893233484369064, "learning_rate": 7.722521227864216e-06, "loss": 18.2455, "step": 18449 }, { "epoch": 0.3372511744383717, "grad_norm": 7.142750209709399, "learning_rate": 7.722272941022268e-06, "loss": 17.8689, "step": 18450 }, { "epoch": 0.3372694536348182, "grad_norm": 7.571102108735381, "learning_rate": 7.72202464463899e-06, "loss": 18.0523, "step": 18451 }, { "epoch": 0.33728773283126473, "grad_norm": 7.237628522781843, "learning_rate": 7.721776338715252e-06, "loss": 17.7742, "step": 18452 }, { "epoch": 0.33730601202771127, "grad_norm": 8.014417650877423, "learning_rate": 7.721528023251924e-06, "loss": 18.1643, "step": 18453 }, { "epoch": 0.3373242912241578, "grad_norm": 5.603909557078515, "learning_rate": 7.721279698249878e-06, "loss": 17.1089, "step": 18454 }, { "epoch": 0.33734257042060434, "grad_norm": 8.166787237695967, "learning_rate": 7.72103136370998e-06, "loss": 18.3125, "step": 18455 }, { "epoch": 0.3373608496170508, "grad_norm": 7.54873919290871, "learning_rate": 7.720783019633103e-06, "loss": 17.8197, "step": 18456 }, { "epoch": 0.33737912881349735, "grad_norm": 5.9202994619846, "learning_rate": 7.720534666020119e-06, "loss": 17.3548, "step": 18457 }, { "epoch": 0.3373974080099439, "grad_norm": 6.260292847986377, "learning_rate": 7.720286302871899e-06, "loss": 17.5368, "step": 18458 }, { "epoch": 0.3374156872063904, "grad_norm": 5.300475724027984, "learning_rate": 7.720037930189308e-06, "loss": 17.0986, "step": 18459 }, { "epoch": 0.3374339664028369, "grad_norm": 8.137722655610194, "learning_rate": 7.719789547973222e-06, "loss": 18.1161, "step": 18460 }, { "epoch": 0.33745224559928344, "grad_norm": 6.534889342544086, "learning_rate": 7.719541156224509e-06, "loss": 17.3363, "step": 18461 }, { "epoch": 0.33747052479573, "grad_norm": 6.652638240476339, "learning_rate": 7.71929275494404e-06, "loss": 17.3188, "step": 18462 }, { "epoch": 0.3374888039921765, "grad_norm": 6.130865054693788, "learning_rate": 7.719044344132687e-06, "loss": 17.2704, "step": 18463 }, { "epoch": 0.33750708318862305, "grad_norm": 6.9481372715060985, "learning_rate": 7.718795923791318e-06, "loss": 17.679, "step": 18464 }, { "epoch": 0.3375253623850695, "grad_norm": 6.577487798955367, "learning_rate": 7.718547493920808e-06, "loss": 17.4429, "step": 18465 }, { "epoch": 0.33754364158151606, "grad_norm": 6.712775286732302, "learning_rate": 7.718299054522023e-06, "loss": 17.616, "step": 18466 }, { "epoch": 0.3375619207779626, "grad_norm": 6.309615343063586, "learning_rate": 7.718050605595834e-06, "loss": 17.5656, "step": 18467 }, { "epoch": 0.33758019997440913, "grad_norm": 7.976188969781821, "learning_rate": 7.717802147143116e-06, "loss": 17.9397, "step": 18468 }, { "epoch": 0.33759847917085567, "grad_norm": 6.056843727085684, "learning_rate": 7.717553679164736e-06, "loss": 17.6374, "step": 18469 }, { "epoch": 0.33761675836730215, "grad_norm": 5.79688689723586, "learning_rate": 7.717305201661567e-06, "loss": 17.2079, "step": 18470 }, { "epoch": 0.3376350375637487, "grad_norm": 8.007233412783789, "learning_rate": 7.717056714634478e-06, "loss": 18.2403, "step": 18471 }, { "epoch": 0.3376533167601952, "grad_norm": 8.340269402764084, "learning_rate": 7.716808218084344e-06, "loss": 18.3294, "step": 18472 }, { "epoch": 0.33767159595664176, "grad_norm": 5.789205177314982, "learning_rate": 7.716559712012029e-06, "loss": 17.2842, "step": 18473 }, { "epoch": 0.3376898751530883, "grad_norm": 6.961346009129132, "learning_rate": 7.716311196418409e-06, "loss": 18.0157, "step": 18474 }, { "epoch": 0.3377081543495348, "grad_norm": 6.816075453202713, "learning_rate": 7.716062671304356e-06, "loss": 17.5105, "step": 18475 }, { "epoch": 0.3377264335459813, "grad_norm": 7.1519747347555525, "learning_rate": 7.715814136670738e-06, "loss": 17.6979, "step": 18476 }, { "epoch": 0.33774471274242784, "grad_norm": 7.397342958436656, "learning_rate": 7.715565592518426e-06, "loss": 18.2724, "step": 18477 }, { "epoch": 0.3377629919388744, "grad_norm": 7.2484185914721415, "learning_rate": 7.715317038848294e-06, "loss": 18.0584, "step": 18478 }, { "epoch": 0.3377812711353209, "grad_norm": 6.753753332473047, "learning_rate": 7.71506847566121e-06, "loss": 17.6873, "step": 18479 }, { "epoch": 0.3377995503317674, "grad_norm": 5.910713944546503, "learning_rate": 7.714819902958047e-06, "loss": 17.2845, "step": 18480 }, { "epoch": 0.33781782952821393, "grad_norm": 6.040055410360147, "learning_rate": 7.714571320739674e-06, "loss": 17.6741, "step": 18481 }, { "epoch": 0.33783610872466047, "grad_norm": 6.16224240871509, "learning_rate": 7.714322729006968e-06, "loss": 17.4705, "step": 18482 }, { "epoch": 0.337854387921107, "grad_norm": 6.786275507486063, "learning_rate": 7.714074127760793e-06, "loss": 17.6599, "step": 18483 }, { "epoch": 0.33787266711755354, "grad_norm": 5.052748271895253, "learning_rate": 7.713825517002025e-06, "loss": 16.891, "step": 18484 }, { "epoch": 0.337890946314, "grad_norm": 6.548327809552189, "learning_rate": 7.713576896731534e-06, "loss": 17.8533, "step": 18485 }, { "epoch": 0.33790922551044655, "grad_norm": 6.809651591047302, "learning_rate": 7.713328266950192e-06, "loss": 17.5008, "step": 18486 }, { "epoch": 0.3379275047068931, "grad_norm": 6.811358222407411, "learning_rate": 7.71307962765887e-06, "loss": 17.4791, "step": 18487 }, { "epoch": 0.3379457839033396, "grad_norm": 6.732612926720186, "learning_rate": 7.712830978858437e-06, "loss": 17.6251, "step": 18488 }, { "epoch": 0.33796406309978616, "grad_norm": 6.0661694502438674, "learning_rate": 7.71258232054977e-06, "loss": 17.433, "step": 18489 }, { "epoch": 0.33798234229623264, "grad_norm": 7.741720293416946, "learning_rate": 7.712333652733736e-06, "loss": 17.7191, "step": 18490 }, { "epoch": 0.3380006214926792, "grad_norm": 5.94148528406997, "learning_rate": 7.712084975411207e-06, "loss": 17.4314, "step": 18491 }, { "epoch": 0.3380189006891257, "grad_norm": 5.503236881276483, "learning_rate": 7.711836288583056e-06, "loss": 17.3177, "step": 18492 }, { "epoch": 0.33803717988557225, "grad_norm": 6.227555450615774, "learning_rate": 7.711587592250157e-06, "loss": 17.5322, "step": 18493 }, { "epoch": 0.3380554590820187, "grad_norm": 6.5307639282098044, "learning_rate": 7.711338886413375e-06, "loss": 17.6392, "step": 18494 }, { "epoch": 0.33807373827846526, "grad_norm": 8.25031259660908, "learning_rate": 7.711090171073585e-06, "loss": 18.2677, "step": 18495 }, { "epoch": 0.3380920174749118, "grad_norm": 7.919697683464512, "learning_rate": 7.710841446231662e-06, "loss": 17.9129, "step": 18496 }, { "epoch": 0.33811029667135833, "grad_norm": 6.672298055295145, "learning_rate": 7.710592711888473e-06, "loss": 17.546, "step": 18497 }, { "epoch": 0.33812857586780487, "grad_norm": 5.8147908282243925, "learning_rate": 7.710343968044893e-06, "loss": 17.1957, "step": 18498 }, { "epoch": 0.33814685506425135, "grad_norm": 7.871496156274192, "learning_rate": 7.710095214701792e-06, "loss": 17.8094, "step": 18499 }, { "epoch": 0.3381651342606979, "grad_norm": 8.4109188978225, "learning_rate": 7.709846451860044e-06, "loss": 18.0162, "step": 18500 }, { "epoch": 0.3381834134571444, "grad_norm": 5.2937680106235705, "learning_rate": 7.709597679520517e-06, "loss": 17.0771, "step": 18501 }, { "epoch": 0.33820169265359096, "grad_norm": 6.379321046963044, "learning_rate": 7.709348897684087e-06, "loss": 17.7056, "step": 18502 }, { "epoch": 0.3382199718500375, "grad_norm": 7.265112552027738, "learning_rate": 7.709100106351624e-06, "loss": 17.5608, "step": 18503 }, { "epoch": 0.338238251046484, "grad_norm": 6.226972182198487, "learning_rate": 7.708851305523999e-06, "loss": 17.03, "step": 18504 }, { "epoch": 0.3382565302429305, "grad_norm": 6.515094573049991, "learning_rate": 7.708602495202085e-06, "loss": 17.4419, "step": 18505 }, { "epoch": 0.33827480943937704, "grad_norm": 5.828203314056155, "learning_rate": 7.708353675386756e-06, "loss": 17.4523, "step": 18506 }, { "epoch": 0.3382930886358236, "grad_norm": 7.433264374469968, "learning_rate": 7.70810484607888e-06, "loss": 17.8551, "step": 18507 }, { "epoch": 0.3383113678322701, "grad_norm": 6.452021201753046, "learning_rate": 7.707856007279336e-06, "loss": 17.5515, "step": 18508 }, { "epoch": 0.3383296470287166, "grad_norm": 6.927308323353563, "learning_rate": 7.707607158988989e-06, "loss": 17.6572, "step": 18509 }, { "epoch": 0.33834792622516313, "grad_norm": 7.59364801967737, "learning_rate": 7.707358301208711e-06, "loss": 18.0751, "step": 18510 }, { "epoch": 0.33836620542160967, "grad_norm": 8.481590889263641, "learning_rate": 7.70710943393938e-06, "loss": 18.4459, "step": 18511 }, { "epoch": 0.3383844846180562, "grad_norm": 6.218440313046197, "learning_rate": 7.706860557181865e-06, "loss": 17.4924, "step": 18512 }, { "epoch": 0.33840276381450274, "grad_norm": 7.873215963953774, "learning_rate": 7.70661167093704e-06, "loss": 17.8562, "step": 18513 }, { "epoch": 0.3384210430109492, "grad_norm": 5.718721767784067, "learning_rate": 7.706362775205775e-06, "loss": 17.4298, "step": 18514 }, { "epoch": 0.33843932220739575, "grad_norm": 6.855804305720729, "learning_rate": 7.706113869988942e-06, "loss": 17.7369, "step": 18515 }, { "epoch": 0.3384576014038423, "grad_norm": 6.285208999963029, "learning_rate": 7.705864955287417e-06, "loss": 17.4317, "step": 18516 }, { "epoch": 0.3384758806002888, "grad_norm": 6.876274856684372, "learning_rate": 7.705616031102067e-06, "loss": 17.7482, "step": 18517 }, { "epoch": 0.33849415979673536, "grad_norm": 6.551674808949516, "learning_rate": 7.70536709743377e-06, "loss": 17.5721, "step": 18518 }, { "epoch": 0.33851243899318184, "grad_norm": 7.1792626870622795, "learning_rate": 7.705118154283395e-06, "loss": 17.81, "step": 18519 }, { "epoch": 0.3385307181896284, "grad_norm": 6.532119534364582, "learning_rate": 7.704869201651817e-06, "loss": 17.4447, "step": 18520 }, { "epoch": 0.3385489973860749, "grad_norm": 7.836061959256069, "learning_rate": 7.704620239539907e-06, "loss": 18.0364, "step": 18521 }, { "epoch": 0.33856727658252145, "grad_norm": 7.278440436296791, "learning_rate": 7.704371267948537e-06, "loss": 17.9914, "step": 18522 }, { "epoch": 0.338585555778968, "grad_norm": 9.443688365625592, "learning_rate": 7.704122286878582e-06, "loss": 19.0627, "step": 18523 }, { "epoch": 0.33860383497541446, "grad_norm": 6.7285697217231775, "learning_rate": 7.703873296330911e-06, "loss": 17.7257, "step": 18524 }, { "epoch": 0.338622114171861, "grad_norm": 6.832079061969127, "learning_rate": 7.703624296306398e-06, "loss": 17.8432, "step": 18525 }, { "epoch": 0.33864039336830754, "grad_norm": 7.372634881950305, "learning_rate": 7.703375286805918e-06, "loss": 18.1717, "step": 18526 }, { "epoch": 0.33865867256475407, "grad_norm": 6.671263012834701, "learning_rate": 7.703126267830341e-06, "loss": 17.832, "step": 18527 }, { "epoch": 0.33867695176120055, "grad_norm": 8.064692606205229, "learning_rate": 7.702877239380541e-06, "loss": 18.3962, "step": 18528 }, { "epoch": 0.3386952309576471, "grad_norm": 5.9140910909686655, "learning_rate": 7.702628201457393e-06, "loss": 17.543, "step": 18529 }, { "epoch": 0.3387135101540936, "grad_norm": 7.536298645706425, "learning_rate": 7.702379154061766e-06, "loss": 18.0075, "step": 18530 }, { "epoch": 0.33873178935054016, "grad_norm": 6.903730155055247, "learning_rate": 7.702130097194536e-06, "loss": 17.737, "step": 18531 }, { "epoch": 0.3387500685469867, "grad_norm": 6.892784737799518, "learning_rate": 7.701881030856573e-06, "loss": 17.8239, "step": 18532 }, { "epoch": 0.3387683477434332, "grad_norm": 7.989731384844175, "learning_rate": 7.701631955048751e-06, "loss": 18.1867, "step": 18533 }, { "epoch": 0.3387866269398797, "grad_norm": 6.75314577527341, "learning_rate": 7.701382869771944e-06, "loss": 17.6316, "step": 18534 }, { "epoch": 0.33880490613632624, "grad_norm": 6.425552269052934, "learning_rate": 7.701133775027026e-06, "loss": 17.7064, "step": 18535 }, { "epoch": 0.3388231853327728, "grad_norm": 6.475145211434733, "learning_rate": 7.700884670814867e-06, "loss": 17.5869, "step": 18536 }, { "epoch": 0.3388414645292193, "grad_norm": 7.979887064014127, "learning_rate": 7.700635557136342e-06, "loss": 17.9246, "step": 18537 }, { "epoch": 0.3388597437256658, "grad_norm": 7.1966159183684155, "learning_rate": 7.700386433992325e-06, "loss": 17.6055, "step": 18538 }, { "epoch": 0.33887802292211233, "grad_norm": 5.8361565872888335, "learning_rate": 7.700137301383685e-06, "loss": 17.3603, "step": 18539 }, { "epoch": 0.33889630211855887, "grad_norm": 6.800728664016093, "learning_rate": 7.6998881593113e-06, "loss": 17.8276, "step": 18540 }, { "epoch": 0.3389145813150054, "grad_norm": 6.969027315962583, "learning_rate": 7.69963900777604e-06, "loss": 18.0461, "step": 18541 }, { "epoch": 0.33893286051145194, "grad_norm": 7.748699637922752, "learning_rate": 7.699389846778781e-06, "loss": 18.1258, "step": 18542 }, { "epoch": 0.3389511397078984, "grad_norm": 7.299936552345248, "learning_rate": 7.699140676320394e-06, "loss": 17.5742, "step": 18543 }, { "epoch": 0.33896941890434495, "grad_norm": 5.406616937955294, "learning_rate": 7.698891496401753e-06, "loss": 17.1552, "step": 18544 }, { "epoch": 0.3389876981007915, "grad_norm": 6.105185122653779, "learning_rate": 7.698642307023732e-06, "loss": 17.3877, "step": 18545 }, { "epoch": 0.339005977297238, "grad_norm": 6.070445457980878, "learning_rate": 7.698393108187203e-06, "loss": 17.433, "step": 18546 }, { "epoch": 0.33902425649368456, "grad_norm": 7.327904658034851, "learning_rate": 7.698143899893042e-06, "loss": 18.2264, "step": 18547 }, { "epoch": 0.33904253569013104, "grad_norm": 5.811399519134845, "learning_rate": 7.697894682142119e-06, "loss": 17.2927, "step": 18548 }, { "epoch": 0.3390608148865776, "grad_norm": 8.371710481814059, "learning_rate": 7.697645454935311e-06, "loss": 18.5259, "step": 18549 }, { "epoch": 0.3390790940830241, "grad_norm": 6.162236942618617, "learning_rate": 7.697396218273488e-06, "loss": 17.4977, "step": 18550 }, { "epoch": 0.33909737327947065, "grad_norm": 5.667929160973008, "learning_rate": 7.697146972157527e-06, "loss": 17.2238, "step": 18551 }, { "epoch": 0.3391156524759172, "grad_norm": 6.3630864987232725, "learning_rate": 7.696897716588299e-06, "loss": 17.5051, "step": 18552 }, { "epoch": 0.33913393167236366, "grad_norm": 5.547956065392964, "learning_rate": 7.696648451566678e-06, "loss": 17.2606, "step": 18553 }, { "epoch": 0.3391522108688102, "grad_norm": 5.693365817578991, "learning_rate": 7.696399177093539e-06, "loss": 17.2777, "step": 18554 }, { "epoch": 0.33917049006525674, "grad_norm": 7.699578961192763, "learning_rate": 7.696149893169756e-06, "loss": 18.1077, "step": 18555 }, { "epoch": 0.33918876926170327, "grad_norm": 7.370075956908817, "learning_rate": 7.6959005997962e-06, "loss": 17.7865, "step": 18556 }, { "epoch": 0.3392070484581498, "grad_norm": 6.320802877160347, "learning_rate": 7.695651296973745e-06, "loss": 17.6819, "step": 18557 }, { "epoch": 0.3392253276545963, "grad_norm": 7.172380059789501, "learning_rate": 7.695401984703269e-06, "loss": 17.9064, "step": 18558 }, { "epoch": 0.3392436068510428, "grad_norm": 5.8204236789504575, "learning_rate": 7.695152662985641e-06, "loss": 17.3202, "step": 18559 }, { "epoch": 0.33926188604748936, "grad_norm": 6.671811365467112, "learning_rate": 7.694903331821736e-06, "loss": 17.4557, "step": 18560 }, { "epoch": 0.3392801652439359, "grad_norm": 5.506696943152934, "learning_rate": 7.694653991212431e-06, "loss": 17.1238, "step": 18561 }, { "epoch": 0.3392984444403824, "grad_norm": 6.6946185875147926, "learning_rate": 7.694404641158597e-06, "loss": 18.0571, "step": 18562 }, { "epoch": 0.3393167236368289, "grad_norm": 5.7400999723436295, "learning_rate": 7.694155281661108e-06, "loss": 17.1185, "step": 18563 }, { "epoch": 0.33933500283327545, "grad_norm": 4.560350280967302, "learning_rate": 7.69390591272084e-06, "loss": 16.7462, "step": 18564 }, { "epoch": 0.339353282029722, "grad_norm": 6.779489856870191, "learning_rate": 7.693656534338665e-06, "loss": 17.9682, "step": 18565 }, { "epoch": 0.3393715612261685, "grad_norm": 6.055479736217653, "learning_rate": 7.693407146515455e-06, "loss": 17.3872, "step": 18566 }, { "epoch": 0.339389840422615, "grad_norm": 7.063314999171396, "learning_rate": 7.693157749252089e-06, "loss": 17.8399, "step": 18567 }, { "epoch": 0.33940811961906153, "grad_norm": 6.485390388814588, "learning_rate": 7.692908342549439e-06, "loss": 17.3437, "step": 18568 }, { "epoch": 0.33942639881550807, "grad_norm": 7.67912906130451, "learning_rate": 7.692658926408378e-06, "loss": 18.3269, "step": 18569 }, { "epoch": 0.3394446780119546, "grad_norm": 6.66874415783317, "learning_rate": 7.692409500829781e-06, "loss": 17.6165, "step": 18570 }, { "epoch": 0.33946295720840114, "grad_norm": 5.525745694311952, "learning_rate": 7.692160065814522e-06, "loss": 16.9587, "step": 18571 }, { "epoch": 0.3394812364048476, "grad_norm": 6.5776478903528135, "learning_rate": 7.691910621363479e-06, "loss": 17.9475, "step": 18572 }, { "epoch": 0.33949951560129416, "grad_norm": 6.735479974909261, "learning_rate": 7.691661167477519e-06, "loss": 17.3685, "step": 18573 }, { "epoch": 0.3395177947977407, "grad_norm": 6.103561192639743, "learning_rate": 7.691411704157521e-06, "loss": 17.369, "step": 18574 }, { "epoch": 0.3395360739941872, "grad_norm": 5.988450917113393, "learning_rate": 7.69116223140436e-06, "loss": 17.2464, "step": 18575 }, { "epoch": 0.33955435319063376, "grad_norm": 5.907624695401685, "learning_rate": 7.690912749218908e-06, "loss": 17.3719, "step": 18576 }, { "epoch": 0.33957263238708024, "grad_norm": 6.418615476685507, "learning_rate": 7.69066325760204e-06, "loss": 17.6757, "step": 18577 }, { "epoch": 0.3395909115835268, "grad_norm": 6.390374719797928, "learning_rate": 7.69041375655463e-06, "loss": 17.4991, "step": 18578 }, { "epoch": 0.3396091907799733, "grad_norm": 7.513673674936005, "learning_rate": 7.690164246077553e-06, "loss": 17.6994, "step": 18579 }, { "epoch": 0.33962746997641985, "grad_norm": 7.370235617724177, "learning_rate": 7.689914726171685e-06, "loss": 17.7465, "step": 18580 }, { "epoch": 0.3396457491728664, "grad_norm": 5.776963263010866, "learning_rate": 7.6896651968379e-06, "loss": 17.1914, "step": 18581 }, { "epoch": 0.33966402836931286, "grad_norm": 7.480861992419118, "learning_rate": 7.689415658077074e-06, "loss": 18.0238, "step": 18582 }, { "epoch": 0.3396823075657594, "grad_norm": 7.51474730627037, "learning_rate": 7.689166109890073e-06, "loss": 17.8101, "step": 18583 }, { "epoch": 0.33970058676220594, "grad_norm": 5.94084192894617, "learning_rate": 7.688916552277783e-06, "loss": 17.2131, "step": 18584 }, { "epoch": 0.33971886595865247, "grad_norm": 5.7606416869487616, "learning_rate": 7.688666985241074e-06, "loss": 17.5015, "step": 18585 }, { "epoch": 0.339737145155099, "grad_norm": 7.528853867532281, "learning_rate": 7.688417408780817e-06, "loss": 18.0463, "step": 18586 }, { "epoch": 0.3397554243515455, "grad_norm": 6.374259758307279, "learning_rate": 7.688167822897894e-06, "loss": 17.3193, "step": 18587 }, { "epoch": 0.339773703547992, "grad_norm": 7.187857336463657, "learning_rate": 7.687918227593174e-06, "loss": 17.9579, "step": 18588 }, { "epoch": 0.33979198274443856, "grad_norm": 6.771650370125248, "learning_rate": 7.687668622867535e-06, "loss": 17.7377, "step": 18589 }, { "epoch": 0.3398102619408851, "grad_norm": 6.478670704204277, "learning_rate": 7.687419008721848e-06, "loss": 17.6423, "step": 18590 }, { "epoch": 0.33982854113733163, "grad_norm": 5.801317083485911, "learning_rate": 7.687169385156994e-06, "loss": 17.2358, "step": 18591 }, { "epoch": 0.3398468203337781, "grad_norm": 5.778391106065269, "learning_rate": 7.686919752173842e-06, "loss": 17.1861, "step": 18592 }, { "epoch": 0.33986509953022465, "grad_norm": 5.4130668068485495, "learning_rate": 7.686670109773271e-06, "loss": 17.1547, "step": 18593 }, { "epoch": 0.3398833787266712, "grad_norm": 7.179857244364473, "learning_rate": 7.686420457956153e-06, "loss": 18.2276, "step": 18594 }, { "epoch": 0.3399016579231177, "grad_norm": 7.545135015636312, "learning_rate": 7.686170796723365e-06, "loss": 17.9744, "step": 18595 }, { "epoch": 0.3399199371195642, "grad_norm": 5.992206441478063, "learning_rate": 7.685921126075783e-06, "loss": 17.4739, "step": 18596 }, { "epoch": 0.33993821631601073, "grad_norm": 7.753257465418856, "learning_rate": 7.685671446014281e-06, "loss": 17.8907, "step": 18597 }, { "epoch": 0.33995649551245727, "grad_norm": 5.220488384198819, "learning_rate": 7.68542175653973e-06, "loss": 17.2379, "step": 18598 }, { "epoch": 0.3399747747089038, "grad_norm": 6.756246510999741, "learning_rate": 7.68517205765301e-06, "loss": 17.9904, "step": 18599 }, { "epoch": 0.33999305390535034, "grad_norm": 6.6665878497981295, "learning_rate": 7.684922349354997e-06, "loss": 17.666, "step": 18600 }, { "epoch": 0.3400113331017968, "grad_norm": 6.7635936038448685, "learning_rate": 7.684672631646561e-06, "loss": 17.6601, "step": 18601 }, { "epoch": 0.34002961229824336, "grad_norm": 6.019484555741808, "learning_rate": 7.684422904528584e-06, "loss": 17.4302, "step": 18602 }, { "epoch": 0.3400478914946899, "grad_norm": 6.587729455162058, "learning_rate": 7.684173168001936e-06, "loss": 17.6979, "step": 18603 }, { "epoch": 0.3400661706911364, "grad_norm": 6.695329373211095, "learning_rate": 7.683923422067492e-06, "loss": 17.7751, "step": 18604 }, { "epoch": 0.34008444988758296, "grad_norm": 5.599427833123826, "learning_rate": 7.683673666726133e-06, "loss": 17.3286, "step": 18605 }, { "epoch": 0.34010272908402944, "grad_norm": 6.655312439568152, "learning_rate": 7.683423901978729e-06, "loss": 17.6809, "step": 18606 }, { "epoch": 0.340121008280476, "grad_norm": 5.213666939523555, "learning_rate": 7.683174127826156e-06, "loss": 17.0193, "step": 18607 }, { "epoch": 0.3401392874769225, "grad_norm": 6.082647708732131, "learning_rate": 7.682924344269292e-06, "loss": 17.4358, "step": 18608 }, { "epoch": 0.34015756667336905, "grad_norm": 6.728008690159649, "learning_rate": 7.682674551309008e-06, "loss": 17.8505, "step": 18609 }, { "epoch": 0.3401758458698156, "grad_norm": 6.666065553449126, "learning_rate": 7.682424748946187e-06, "loss": 17.8174, "step": 18610 }, { "epoch": 0.34019412506626207, "grad_norm": 5.802111281189128, "learning_rate": 7.682174937181696e-06, "loss": 17.2524, "step": 18611 }, { "epoch": 0.3402124042627086, "grad_norm": 7.474876302335836, "learning_rate": 7.681925116016417e-06, "loss": 18.1691, "step": 18612 }, { "epoch": 0.34023068345915514, "grad_norm": 6.281967462298568, "learning_rate": 7.681675285451223e-06, "loss": 17.3999, "step": 18613 }, { "epoch": 0.34024896265560167, "grad_norm": 7.110678994979965, "learning_rate": 7.68142544548699e-06, "loss": 17.7144, "step": 18614 }, { "epoch": 0.3402672418520482, "grad_norm": 6.398574156336081, "learning_rate": 7.681175596124592e-06, "loss": 17.4233, "step": 18615 }, { "epoch": 0.3402855210484947, "grad_norm": 6.289630212886747, "learning_rate": 7.680925737364908e-06, "loss": 17.3901, "step": 18616 }, { "epoch": 0.3403038002449412, "grad_norm": 5.8380846989527155, "learning_rate": 7.680675869208811e-06, "loss": 17.1604, "step": 18617 }, { "epoch": 0.34032207944138776, "grad_norm": 5.569289614347202, "learning_rate": 7.680425991657177e-06, "loss": 17.0017, "step": 18618 }, { "epoch": 0.3403403586378343, "grad_norm": 6.144307323923875, "learning_rate": 7.680176104710883e-06, "loss": 17.4247, "step": 18619 }, { "epoch": 0.34035863783428083, "grad_norm": 6.459215526872056, "learning_rate": 7.679926208370807e-06, "loss": 17.6302, "step": 18620 }, { "epoch": 0.3403769170307273, "grad_norm": 5.93392102819098, "learning_rate": 7.679676302637818e-06, "loss": 17.2792, "step": 18621 }, { "epoch": 0.34039519622717385, "grad_norm": 7.4960631324250215, "learning_rate": 7.679426387512799e-06, "loss": 17.7464, "step": 18622 }, { "epoch": 0.3404134754236204, "grad_norm": 8.31714979088079, "learning_rate": 7.679176462996622e-06, "loss": 18.3638, "step": 18623 }, { "epoch": 0.3404317546200669, "grad_norm": 5.6869215349779525, "learning_rate": 7.678926529090164e-06, "loss": 17.3644, "step": 18624 }, { "epoch": 0.34045003381651345, "grad_norm": 9.132516392454006, "learning_rate": 7.6786765857943e-06, "loss": 18.0032, "step": 18625 }, { "epoch": 0.34046831301295993, "grad_norm": 6.52480229562743, "learning_rate": 7.678426633109908e-06, "loss": 17.6034, "step": 18626 }, { "epoch": 0.34048659220940647, "grad_norm": 8.39836950754598, "learning_rate": 7.678176671037864e-06, "loss": 18.007, "step": 18627 }, { "epoch": 0.340504871405853, "grad_norm": 5.796424602940962, "learning_rate": 7.677926699579042e-06, "loss": 17.2748, "step": 18628 }, { "epoch": 0.34052315060229954, "grad_norm": 6.627960521855117, "learning_rate": 7.677676718734319e-06, "loss": 17.6418, "step": 18629 }, { "epoch": 0.340541429798746, "grad_norm": 7.443410950212804, "learning_rate": 7.677426728504572e-06, "loss": 18.0215, "step": 18630 }, { "epoch": 0.34055970899519256, "grad_norm": 6.474408461785655, "learning_rate": 7.677176728890677e-06, "loss": 17.5713, "step": 18631 }, { "epoch": 0.3405779881916391, "grad_norm": 7.853435968222706, "learning_rate": 7.676926719893509e-06, "loss": 18.0385, "step": 18632 }, { "epoch": 0.3405962673880856, "grad_norm": 7.4119833741467795, "learning_rate": 7.676676701513945e-06, "loss": 17.9976, "step": 18633 }, { "epoch": 0.34061454658453216, "grad_norm": 7.26715653733841, "learning_rate": 7.676426673752862e-06, "loss": 18.1636, "step": 18634 }, { "epoch": 0.34063282578097864, "grad_norm": 7.406503317562208, "learning_rate": 7.676176636611137e-06, "loss": 17.742, "step": 18635 }, { "epoch": 0.3406511049774252, "grad_norm": 6.3999819331489585, "learning_rate": 7.675926590089643e-06, "loss": 17.6915, "step": 18636 }, { "epoch": 0.3406693841738717, "grad_norm": 7.352261573316151, "learning_rate": 7.675676534189261e-06, "loss": 17.7272, "step": 18637 }, { "epoch": 0.34068766337031825, "grad_norm": 6.6079238737001695, "learning_rate": 7.675426468910862e-06, "loss": 17.4509, "step": 18638 }, { "epoch": 0.3407059425667648, "grad_norm": 5.703601467050324, "learning_rate": 7.675176394255326e-06, "loss": 17.4343, "step": 18639 }, { "epoch": 0.34072422176321127, "grad_norm": 7.165661530921426, "learning_rate": 7.67492631022353e-06, "loss": 17.945, "step": 18640 }, { "epoch": 0.3407425009596578, "grad_norm": 5.0921407244045165, "learning_rate": 7.67467621681635e-06, "loss": 16.9564, "step": 18641 }, { "epoch": 0.34076078015610434, "grad_norm": 7.055686350188954, "learning_rate": 7.674426114034662e-06, "loss": 18.0199, "step": 18642 }, { "epoch": 0.3407790593525509, "grad_norm": 6.803755903484869, "learning_rate": 7.67417600187934e-06, "loss": 17.4164, "step": 18643 }, { "epoch": 0.3407973385489974, "grad_norm": 7.2652829283862514, "learning_rate": 7.673925880351266e-06, "loss": 17.7704, "step": 18644 }, { "epoch": 0.3408156177454439, "grad_norm": 7.275087787321198, "learning_rate": 7.673675749451312e-06, "loss": 17.8544, "step": 18645 }, { "epoch": 0.3408338969418904, "grad_norm": 6.113747157958731, "learning_rate": 7.673425609180356e-06, "loss": 17.4012, "step": 18646 }, { "epoch": 0.34085217613833696, "grad_norm": 5.939387750543694, "learning_rate": 7.673175459539277e-06, "loss": 17.0685, "step": 18647 }, { "epoch": 0.3408704553347835, "grad_norm": 5.721475154394008, "learning_rate": 7.672925300528949e-06, "loss": 17.1819, "step": 18648 }, { "epoch": 0.34088873453123003, "grad_norm": 6.697699313686008, "learning_rate": 7.672675132150249e-06, "loss": 17.3943, "step": 18649 }, { "epoch": 0.3409070137276765, "grad_norm": 6.393508358343627, "learning_rate": 7.672424954404057e-06, "loss": 17.5696, "step": 18650 }, { "epoch": 0.34092529292412305, "grad_norm": 8.07438924970571, "learning_rate": 7.672174767291246e-06, "loss": 17.8587, "step": 18651 }, { "epoch": 0.3409435721205696, "grad_norm": 6.642235116480529, "learning_rate": 7.671924570812694e-06, "loss": 17.6111, "step": 18652 }, { "epoch": 0.3409618513170161, "grad_norm": 6.590922537754951, "learning_rate": 7.671674364969277e-06, "loss": 17.4407, "step": 18653 }, { "epoch": 0.34098013051346265, "grad_norm": 5.966974561777243, "learning_rate": 7.671424149761878e-06, "loss": 17.4277, "step": 18654 }, { "epoch": 0.34099840970990913, "grad_norm": 7.563680802508155, "learning_rate": 7.671173925191364e-06, "loss": 17.7721, "step": 18655 }, { "epoch": 0.34101668890635567, "grad_norm": 6.201240177848574, "learning_rate": 7.670923691258619e-06, "loss": 17.2041, "step": 18656 }, { "epoch": 0.3410349681028022, "grad_norm": 6.99645951902213, "learning_rate": 7.670673447964518e-06, "loss": 17.6827, "step": 18657 }, { "epoch": 0.34105324729924874, "grad_norm": 6.715923558615538, "learning_rate": 7.67042319530994e-06, "loss": 17.2222, "step": 18658 }, { "epoch": 0.3410715264956953, "grad_norm": 9.084080103438504, "learning_rate": 7.670172933295758e-06, "loss": 18.4638, "step": 18659 }, { "epoch": 0.34108980569214176, "grad_norm": 6.348419974733283, "learning_rate": 7.669922661922853e-06, "loss": 17.379, "step": 18660 }, { "epoch": 0.3411080848885883, "grad_norm": 8.42640356163809, "learning_rate": 7.6696723811921e-06, "loss": 18.2419, "step": 18661 }, { "epoch": 0.34112636408503483, "grad_norm": 6.340744017100043, "learning_rate": 7.669422091104377e-06, "loss": 17.2227, "step": 18662 }, { "epoch": 0.34114464328148136, "grad_norm": 6.583856869218941, "learning_rate": 7.669171791660562e-06, "loss": 17.5243, "step": 18663 }, { "epoch": 0.34116292247792784, "grad_norm": 6.765985311311577, "learning_rate": 7.668921482861531e-06, "loss": 17.6967, "step": 18664 }, { "epoch": 0.3411812016743744, "grad_norm": 7.855367461729398, "learning_rate": 7.668671164708163e-06, "loss": 18.0338, "step": 18665 }, { "epoch": 0.3411994808708209, "grad_norm": 5.842699478441956, "learning_rate": 7.668420837201331e-06, "loss": 17.3604, "step": 18666 }, { "epoch": 0.34121776006726745, "grad_norm": 5.647097215030044, "learning_rate": 7.668170500341918e-06, "loss": 17.3587, "step": 18667 }, { "epoch": 0.341236039263714, "grad_norm": 6.117666474122066, "learning_rate": 7.6679201541308e-06, "loss": 17.5845, "step": 18668 }, { "epoch": 0.34125431846016047, "grad_norm": 6.599540683177623, "learning_rate": 7.667669798568852e-06, "loss": 17.6067, "step": 18669 }, { "epoch": 0.341272597656607, "grad_norm": 6.819116057737852, "learning_rate": 7.667419433656953e-06, "loss": 17.8113, "step": 18670 }, { "epoch": 0.34129087685305354, "grad_norm": 6.718181840431996, "learning_rate": 7.667169059395978e-06, "loss": 17.6825, "step": 18671 }, { "epoch": 0.3413091560495001, "grad_norm": 6.805742840354403, "learning_rate": 7.66691867578681e-06, "loss": 17.7864, "step": 18672 }, { "epoch": 0.3413274352459466, "grad_norm": 7.512941050983491, "learning_rate": 7.666668282830323e-06, "loss": 18.1333, "step": 18673 }, { "epoch": 0.3413457144423931, "grad_norm": 6.473133563595723, "learning_rate": 7.666417880527395e-06, "loss": 17.5756, "step": 18674 }, { "epoch": 0.3413639936388396, "grad_norm": 6.226849326535687, "learning_rate": 7.666167468878902e-06, "loss": 17.576, "step": 18675 }, { "epoch": 0.34138227283528616, "grad_norm": 6.436126671583582, "learning_rate": 7.665917047885724e-06, "loss": 17.4472, "step": 18676 }, { "epoch": 0.3414005520317327, "grad_norm": 5.675453461661069, "learning_rate": 7.665666617548739e-06, "loss": 17.3, "step": 18677 }, { "epoch": 0.34141883122817923, "grad_norm": 6.821599837399781, "learning_rate": 7.665416177868825e-06, "loss": 17.8204, "step": 18678 }, { "epoch": 0.3414371104246257, "grad_norm": 7.783969477680731, "learning_rate": 7.665165728846857e-06, "loss": 18.1205, "step": 18679 }, { "epoch": 0.34145538962107225, "grad_norm": 6.7448408250391045, "learning_rate": 7.664915270483713e-06, "loss": 17.6446, "step": 18680 }, { "epoch": 0.3414736688175188, "grad_norm": 5.673997915867822, "learning_rate": 7.664664802780275e-06, "loss": 17.0912, "step": 18681 }, { "epoch": 0.3414919480139653, "grad_norm": 6.27327755016016, "learning_rate": 7.664414325737417e-06, "loss": 17.4395, "step": 18682 }, { "epoch": 0.34151022721041185, "grad_norm": 7.16947317453992, "learning_rate": 7.664163839356017e-06, "loss": 18.1252, "step": 18683 }, { "epoch": 0.34152850640685833, "grad_norm": 5.995978383754804, "learning_rate": 7.663913343636955e-06, "loss": 17.2703, "step": 18684 }, { "epoch": 0.34154678560330487, "grad_norm": 6.606834029652045, "learning_rate": 7.663662838581107e-06, "loss": 17.4672, "step": 18685 }, { "epoch": 0.3415650647997514, "grad_norm": 5.280484714656908, "learning_rate": 7.663412324189353e-06, "loss": 16.9884, "step": 18686 }, { "epoch": 0.34158334399619794, "grad_norm": 7.013647159691276, "learning_rate": 7.663161800462569e-06, "loss": 17.7756, "step": 18687 }, { "epoch": 0.3416016231926445, "grad_norm": 7.220249667939323, "learning_rate": 7.662911267401634e-06, "loss": 17.4257, "step": 18688 }, { "epoch": 0.34161990238909096, "grad_norm": 6.068767553887193, "learning_rate": 7.662660725007427e-06, "loss": 17.4268, "step": 18689 }, { "epoch": 0.3416381815855375, "grad_norm": 6.104009893408434, "learning_rate": 7.662410173280825e-06, "loss": 17.5837, "step": 18690 }, { "epoch": 0.34165646078198403, "grad_norm": 6.684856497049956, "learning_rate": 7.662159612222706e-06, "loss": 17.843, "step": 18691 }, { "epoch": 0.34167473997843056, "grad_norm": 6.654655871948643, "learning_rate": 7.661909041833951e-06, "loss": 17.4879, "step": 18692 }, { "epoch": 0.3416930191748771, "grad_norm": 5.581902799086931, "learning_rate": 7.661658462115431e-06, "loss": 17.2192, "step": 18693 }, { "epoch": 0.3417112983713236, "grad_norm": 6.939167635113642, "learning_rate": 7.661407873068031e-06, "loss": 17.7619, "step": 18694 }, { "epoch": 0.3417295775677701, "grad_norm": 6.485193863234857, "learning_rate": 7.661157274692628e-06, "loss": 17.4223, "step": 18695 }, { "epoch": 0.34174785676421665, "grad_norm": 6.644593736895018, "learning_rate": 7.660906666990102e-06, "loss": 17.7662, "step": 18696 }, { "epoch": 0.3417661359606632, "grad_norm": 5.50455822128651, "learning_rate": 7.660656049961326e-06, "loss": 17.0274, "step": 18697 }, { "epoch": 0.34178441515710967, "grad_norm": 6.24026387829265, "learning_rate": 7.66040542360718e-06, "loss": 17.3114, "step": 18698 }, { "epoch": 0.3418026943535562, "grad_norm": 6.941206257786055, "learning_rate": 7.660154787928546e-06, "loss": 18.088, "step": 18699 }, { "epoch": 0.34182097355000274, "grad_norm": 7.010808293183774, "learning_rate": 7.659904142926302e-06, "loss": 17.6137, "step": 18700 }, { "epoch": 0.3418392527464493, "grad_norm": 5.953407799406823, "learning_rate": 7.659653488601322e-06, "loss": 17.2847, "step": 18701 }, { "epoch": 0.3418575319428958, "grad_norm": 6.3305612939514555, "learning_rate": 7.659402824954488e-06, "loss": 17.7074, "step": 18702 }, { "epoch": 0.3418758111393423, "grad_norm": 6.868297075420189, "learning_rate": 7.659152151986679e-06, "loss": 17.529, "step": 18703 }, { "epoch": 0.3418940903357888, "grad_norm": 6.883515930230535, "learning_rate": 7.658901469698771e-06, "loss": 17.5927, "step": 18704 }, { "epoch": 0.34191236953223536, "grad_norm": 5.387613705367663, "learning_rate": 7.658650778091645e-06, "loss": 17.0186, "step": 18705 }, { "epoch": 0.3419306487286819, "grad_norm": 7.590193402357873, "learning_rate": 7.658400077166178e-06, "loss": 18.2854, "step": 18706 }, { "epoch": 0.34194892792512843, "grad_norm": 5.918681654641845, "learning_rate": 7.658149366923249e-06, "loss": 17.1875, "step": 18707 }, { "epoch": 0.3419672071215749, "grad_norm": 6.065267697725173, "learning_rate": 7.65789864736374e-06, "loss": 17.3768, "step": 18708 }, { "epoch": 0.34198548631802145, "grad_norm": 6.035988272592032, "learning_rate": 7.657647918488523e-06, "loss": 17.473, "step": 18709 }, { "epoch": 0.342003765514468, "grad_norm": 7.538219994187357, "learning_rate": 7.657397180298483e-06, "loss": 17.6449, "step": 18710 }, { "epoch": 0.3420220447109145, "grad_norm": 8.440697458735569, "learning_rate": 7.657146432794496e-06, "loss": 18.4291, "step": 18711 }, { "epoch": 0.34204032390736105, "grad_norm": 7.200546142422039, "learning_rate": 7.65689567597744e-06, "loss": 17.6517, "step": 18712 }, { "epoch": 0.34205860310380753, "grad_norm": 7.796034028720834, "learning_rate": 7.656644909848198e-06, "loss": 17.9615, "step": 18713 }, { "epoch": 0.34207688230025407, "grad_norm": 5.797347903855568, "learning_rate": 7.656394134407646e-06, "loss": 17.3534, "step": 18714 }, { "epoch": 0.3420951614967006, "grad_norm": 6.328034515254491, "learning_rate": 7.656143349656661e-06, "loss": 17.3153, "step": 18715 }, { "epoch": 0.34211344069314714, "grad_norm": 8.291240342910957, "learning_rate": 7.655892555596124e-06, "loss": 18.3262, "step": 18716 }, { "epoch": 0.3421317198895937, "grad_norm": 7.436553844779456, "learning_rate": 7.655641752226915e-06, "loss": 17.8486, "step": 18717 }, { "epoch": 0.34214999908604016, "grad_norm": 6.385686728345363, "learning_rate": 7.655390939549911e-06, "loss": 17.9412, "step": 18718 }, { "epoch": 0.3421682782824867, "grad_norm": 7.330562144678844, "learning_rate": 7.655140117565995e-06, "loss": 17.9242, "step": 18719 }, { "epoch": 0.34218655747893323, "grad_norm": 7.543387211435891, "learning_rate": 7.65488928627604e-06, "loss": 18.0056, "step": 18720 }, { "epoch": 0.34220483667537976, "grad_norm": 7.81850260618639, "learning_rate": 7.65463844568093e-06, "loss": 17.9146, "step": 18721 }, { "epoch": 0.3422231158718263, "grad_norm": 5.5160302544833995, "learning_rate": 7.654387595781542e-06, "loss": 17.0875, "step": 18722 }, { "epoch": 0.3422413950682728, "grad_norm": 6.8386345834937305, "learning_rate": 7.654136736578759e-06, "loss": 17.4698, "step": 18723 }, { "epoch": 0.3422596742647193, "grad_norm": 6.12700821599637, "learning_rate": 7.653885868073454e-06, "loss": 17.195, "step": 18724 }, { "epoch": 0.34227795346116585, "grad_norm": 5.048266219225038, "learning_rate": 7.653634990266507e-06, "loss": 17.0995, "step": 18725 }, { "epoch": 0.3422962326576124, "grad_norm": 7.152460909063966, "learning_rate": 7.653384103158802e-06, "loss": 17.6174, "step": 18726 }, { "epoch": 0.3423145118540589, "grad_norm": 5.968102892944309, "learning_rate": 7.653133206751218e-06, "loss": 17.3426, "step": 18727 }, { "epoch": 0.3423327910505054, "grad_norm": 8.25861894424999, "learning_rate": 7.65288230104463e-06, "loss": 18.1985, "step": 18728 }, { "epoch": 0.34235107024695194, "grad_norm": 7.8130738972777465, "learning_rate": 7.652631386039921e-06, "loss": 17.7325, "step": 18729 }, { "epoch": 0.3423693494433985, "grad_norm": 7.035678726252646, "learning_rate": 7.65238046173797e-06, "loss": 18.0212, "step": 18730 }, { "epoch": 0.342387628639845, "grad_norm": 6.6801113104921805, "learning_rate": 7.652129528139654e-06, "loss": 17.6916, "step": 18731 }, { "epoch": 0.3424059078362915, "grad_norm": 7.156947819969368, "learning_rate": 7.651878585245853e-06, "loss": 18.0317, "step": 18732 }, { "epoch": 0.342424187032738, "grad_norm": 7.582022893941064, "learning_rate": 7.65162763305745e-06, "loss": 17.6248, "step": 18733 }, { "epoch": 0.34244246622918456, "grad_norm": 7.9573283114692295, "learning_rate": 7.65137667157532e-06, "loss": 18.3825, "step": 18734 }, { "epoch": 0.3424607454256311, "grad_norm": 5.623300634353293, "learning_rate": 7.651125700800346e-06, "loss": 17.1096, "step": 18735 }, { "epoch": 0.34247902462207763, "grad_norm": 6.6412611985181265, "learning_rate": 7.650874720733407e-06, "loss": 17.4568, "step": 18736 }, { "epoch": 0.3424973038185241, "grad_norm": 5.04614504841508, "learning_rate": 7.650623731375381e-06, "loss": 16.9781, "step": 18737 }, { "epoch": 0.34251558301497065, "grad_norm": 6.510875943565755, "learning_rate": 7.65037273272715e-06, "loss": 17.628, "step": 18738 }, { "epoch": 0.3425338622114172, "grad_norm": 6.4333687956937515, "learning_rate": 7.650121724789592e-06, "loss": 17.8449, "step": 18739 }, { "epoch": 0.3425521414078637, "grad_norm": 6.963383029694077, "learning_rate": 7.649870707563588e-06, "loss": 17.674, "step": 18740 }, { "epoch": 0.34257042060431026, "grad_norm": 7.182635781943536, "learning_rate": 7.649619681050015e-06, "loss": 17.9128, "step": 18741 }, { "epoch": 0.34258869980075674, "grad_norm": 8.654101903838043, "learning_rate": 7.649368645249757e-06, "loss": 18.2864, "step": 18742 }, { "epoch": 0.34260697899720327, "grad_norm": 6.562410099438556, "learning_rate": 7.649117600163691e-06, "loss": 17.5895, "step": 18743 }, { "epoch": 0.3426252581936498, "grad_norm": 7.61451345911573, "learning_rate": 7.648866545792699e-06, "loss": 17.8557, "step": 18744 }, { "epoch": 0.34264353739009634, "grad_norm": 7.150796271708187, "learning_rate": 7.648615482137658e-06, "loss": 17.9896, "step": 18745 }, { "epoch": 0.3426618165865429, "grad_norm": 6.534132170946157, "learning_rate": 7.64836440919945e-06, "loss": 17.8412, "step": 18746 }, { "epoch": 0.34268009578298936, "grad_norm": 6.674528048805177, "learning_rate": 7.648113326978954e-06, "loss": 17.5252, "step": 18747 }, { "epoch": 0.3426983749794359, "grad_norm": 5.492376445780663, "learning_rate": 7.647862235477053e-06, "loss": 17.1936, "step": 18748 }, { "epoch": 0.34271665417588243, "grad_norm": 5.219124691380864, "learning_rate": 7.647611134694621e-06, "loss": 17.003, "step": 18749 }, { "epoch": 0.34273493337232896, "grad_norm": 7.492574678745267, "learning_rate": 7.647360024632544e-06, "loss": 17.9828, "step": 18750 }, { "epoch": 0.3427532125687755, "grad_norm": 5.298951135931565, "learning_rate": 7.6471089052917e-06, "loss": 16.8713, "step": 18751 }, { "epoch": 0.342771491765222, "grad_norm": 6.2397662614587945, "learning_rate": 7.646857776672968e-06, "loss": 17.5835, "step": 18752 }, { "epoch": 0.3427897709616685, "grad_norm": 5.97676886984631, "learning_rate": 7.64660663877723e-06, "loss": 17.6145, "step": 18753 }, { "epoch": 0.34280805015811505, "grad_norm": 5.447429576276824, "learning_rate": 7.646355491605367e-06, "loss": 17.1826, "step": 18754 }, { "epoch": 0.3428263293545616, "grad_norm": 5.293841771378851, "learning_rate": 7.646104335158254e-06, "loss": 17.2044, "step": 18755 }, { "epoch": 0.3428446085510081, "grad_norm": 5.762019716466999, "learning_rate": 7.645853169436777e-06, "loss": 17.3198, "step": 18756 }, { "epoch": 0.3428628877474546, "grad_norm": 6.274008540598406, "learning_rate": 7.645601994441813e-06, "loss": 17.3941, "step": 18757 }, { "epoch": 0.34288116694390114, "grad_norm": 6.880523485854208, "learning_rate": 7.645350810174245e-06, "loss": 17.8719, "step": 18758 }, { "epoch": 0.3428994461403477, "grad_norm": 8.122983849837068, "learning_rate": 7.64509961663495e-06, "loss": 17.9244, "step": 18759 }, { "epoch": 0.3429177253367942, "grad_norm": 6.032228247568009, "learning_rate": 7.644848413824812e-06, "loss": 17.1918, "step": 18760 }, { "epoch": 0.34293600453324075, "grad_norm": 6.7114781849518925, "learning_rate": 7.64459720174471e-06, "loss": 17.6641, "step": 18761 }, { "epoch": 0.3429542837296872, "grad_norm": 5.099540818065632, "learning_rate": 7.644345980395524e-06, "loss": 16.9746, "step": 18762 }, { "epoch": 0.34297256292613376, "grad_norm": 7.0862622411338645, "learning_rate": 7.644094749778134e-06, "loss": 17.9195, "step": 18763 }, { "epoch": 0.3429908421225803, "grad_norm": 6.6467858690198405, "learning_rate": 7.643843509893423e-06, "loss": 17.7148, "step": 18764 }, { "epoch": 0.34300912131902683, "grad_norm": 6.181743736946362, "learning_rate": 7.64359226074227e-06, "loss": 17.5138, "step": 18765 }, { "epoch": 0.3430274005154733, "grad_norm": 7.299730676973299, "learning_rate": 7.643341002325553e-06, "loss": 17.8965, "step": 18766 }, { "epoch": 0.34304567971191985, "grad_norm": 6.007275784693551, "learning_rate": 7.643089734644157e-06, "loss": 17.513, "step": 18767 }, { "epoch": 0.3430639589083664, "grad_norm": 6.887004519072852, "learning_rate": 7.64283845769896e-06, "loss": 17.5114, "step": 18768 }, { "epoch": 0.3430822381048129, "grad_norm": 7.213322080699672, "learning_rate": 7.642587171490846e-06, "loss": 17.6282, "step": 18769 }, { "epoch": 0.34310051730125946, "grad_norm": 7.240584778713403, "learning_rate": 7.64233587602069e-06, "loss": 17.7969, "step": 18770 }, { "epoch": 0.34311879649770594, "grad_norm": 7.131732296849313, "learning_rate": 7.642084571289376e-06, "loss": 17.5967, "step": 18771 }, { "epoch": 0.34313707569415247, "grad_norm": 5.8936483822808405, "learning_rate": 7.641833257297788e-06, "loss": 17.2392, "step": 18772 }, { "epoch": 0.343155354890599, "grad_norm": 7.3028334644621475, "learning_rate": 7.641581934046802e-06, "loss": 17.7516, "step": 18773 }, { "epoch": 0.34317363408704554, "grad_norm": 6.259976548532674, "learning_rate": 7.6413306015373e-06, "loss": 17.2937, "step": 18774 }, { "epoch": 0.3431919132834921, "grad_norm": 7.36073248310213, "learning_rate": 7.641079259770163e-06, "loss": 17.9214, "step": 18775 }, { "epoch": 0.34321019247993856, "grad_norm": 7.563428694144747, "learning_rate": 7.640827908746274e-06, "loss": 17.849, "step": 18776 }, { "epoch": 0.3432284716763851, "grad_norm": 5.960784738086114, "learning_rate": 7.640576548466512e-06, "loss": 17.4236, "step": 18777 }, { "epoch": 0.34324675087283163, "grad_norm": 6.341646836969284, "learning_rate": 7.640325178931757e-06, "loss": 17.4697, "step": 18778 }, { "epoch": 0.34326503006927817, "grad_norm": 6.240868076821658, "learning_rate": 7.640073800142892e-06, "loss": 17.0821, "step": 18779 }, { "epoch": 0.3432833092657247, "grad_norm": 6.089649298515942, "learning_rate": 7.639822412100798e-06, "loss": 17.3529, "step": 18780 }, { "epoch": 0.3433015884621712, "grad_norm": 5.793944393176278, "learning_rate": 7.639571014806356e-06, "loss": 17.3753, "step": 18781 }, { "epoch": 0.3433198676586177, "grad_norm": 6.461406824451548, "learning_rate": 7.639319608260446e-06, "loss": 17.5282, "step": 18782 }, { "epoch": 0.34333814685506425, "grad_norm": 6.998204049448984, "learning_rate": 7.63906819246395e-06, "loss": 17.8295, "step": 18783 }, { "epoch": 0.3433564260515108, "grad_norm": 8.130001995420912, "learning_rate": 7.638816767417746e-06, "loss": 18.1857, "step": 18784 }, { "epoch": 0.3433747052479573, "grad_norm": 6.0315541071302485, "learning_rate": 7.638565333122721e-06, "loss": 17.1067, "step": 18785 }, { "epoch": 0.3433929844444038, "grad_norm": 8.11687880415702, "learning_rate": 7.638313889579754e-06, "loss": 18.4706, "step": 18786 }, { "epoch": 0.34341126364085034, "grad_norm": 6.595413154594915, "learning_rate": 7.638062436789726e-06, "loss": 17.3251, "step": 18787 }, { "epoch": 0.3434295428372969, "grad_norm": 6.549313801078654, "learning_rate": 7.637810974753517e-06, "loss": 17.6947, "step": 18788 }, { "epoch": 0.3434478220337434, "grad_norm": 5.937545933649113, "learning_rate": 7.637559503472009e-06, "loss": 17.1814, "step": 18789 }, { "epoch": 0.34346610123018995, "grad_norm": 6.059549672670603, "learning_rate": 7.637308022946084e-06, "loss": 17.2336, "step": 18790 }, { "epoch": 0.3434843804266364, "grad_norm": 6.964066984769655, "learning_rate": 7.637056533176625e-06, "loss": 17.7251, "step": 18791 }, { "epoch": 0.34350265962308296, "grad_norm": 8.46609148214759, "learning_rate": 7.636805034164511e-06, "loss": 18.3873, "step": 18792 }, { "epoch": 0.3435209388195295, "grad_norm": 5.574070086308546, "learning_rate": 7.636553525910621e-06, "loss": 17.1388, "step": 18793 }, { "epoch": 0.34353921801597603, "grad_norm": 7.062336492708427, "learning_rate": 7.636302008415844e-06, "loss": 17.7763, "step": 18794 }, { "epoch": 0.34355749721242257, "grad_norm": 6.918785307699996, "learning_rate": 7.636050481681055e-06, "loss": 17.5145, "step": 18795 }, { "epoch": 0.34357577640886905, "grad_norm": 6.681621492836319, "learning_rate": 7.63579894570714e-06, "loss": 17.6554, "step": 18796 }, { "epoch": 0.3435940556053156, "grad_norm": 8.249625305925028, "learning_rate": 7.635547400494976e-06, "loss": 18.4605, "step": 18797 }, { "epoch": 0.3436123348017621, "grad_norm": 8.073382903177853, "learning_rate": 7.635295846045447e-06, "loss": 18.4458, "step": 18798 }, { "epoch": 0.34363061399820866, "grad_norm": 6.880827109514999, "learning_rate": 7.635044282359437e-06, "loss": 17.6348, "step": 18799 }, { "epoch": 0.34364889319465514, "grad_norm": 6.763693486197934, "learning_rate": 7.634792709437822e-06, "loss": 17.4167, "step": 18800 }, { "epoch": 0.34366717239110167, "grad_norm": 6.2756764284531386, "learning_rate": 7.63454112728149e-06, "loss": 17.4985, "step": 18801 }, { "epoch": 0.3436854515875482, "grad_norm": 5.653630459512056, "learning_rate": 7.634289535891319e-06, "loss": 17.3694, "step": 18802 }, { "epoch": 0.34370373078399474, "grad_norm": 7.489377884641946, "learning_rate": 7.634037935268191e-06, "loss": 17.7952, "step": 18803 }, { "epoch": 0.3437220099804413, "grad_norm": 5.682485521663398, "learning_rate": 7.63378632541299e-06, "loss": 17.2305, "step": 18804 }, { "epoch": 0.34374028917688776, "grad_norm": 6.994947420661549, "learning_rate": 7.633534706326596e-06, "loss": 17.6213, "step": 18805 }, { "epoch": 0.3437585683733343, "grad_norm": 7.111023801184497, "learning_rate": 7.633283078009892e-06, "loss": 17.9773, "step": 18806 }, { "epoch": 0.34377684756978083, "grad_norm": 9.382723296691937, "learning_rate": 7.633031440463757e-06, "loss": 18.3125, "step": 18807 }, { "epoch": 0.34379512676622737, "grad_norm": 9.081819864001538, "learning_rate": 7.632779793689077e-06, "loss": 18.6075, "step": 18808 }, { "epoch": 0.3438134059626739, "grad_norm": 7.903707217962754, "learning_rate": 7.632528137686732e-06, "loss": 17.8067, "step": 18809 }, { "epoch": 0.3438316851591204, "grad_norm": 6.8143214832862204, "learning_rate": 7.632276472457604e-06, "loss": 17.8137, "step": 18810 }, { "epoch": 0.3438499643555669, "grad_norm": 8.528808663813097, "learning_rate": 7.632024798002577e-06, "loss": 18.0433, "step": 18811 }, { "epoch": 0.34386824355201345, "grad_norm": 5.970033498778536, "learning_rate": 7.631773114322529e-06, "loss": 17.3655, "step": 18812 }, { "epoch": 0.34388652274846, "grad_norm": 7.185904186886038, "learning_rate": 7.631521421418348e-06, "loss": 17.7229, "step": 18813 }, { "epoch": 0.3439048019449065, "grad_norm": 6.002420221406456, "learning_rate": 7.63126971929091e-06, "loss": 17.6412, "step": 18814 }, { "epoch": 0.343923081141353, "grad_norm": 5.6499855524419145, "learning_rate": 7.631018007941101e-06, "loss": 17.1009, "step": 18815 }, { "epoch": 0.34394136033779954, "grad_norm": 5.970341578576657, "learning_rate": 7.6307662873698e-06, "loss": 17.3979, "step": 18816 }, { "epoch": 0.3439596395342461, "grad_norm": 5.279621444832671, "learning_rate": 7.630514557577895e-06, "loss": 17.3045, "step": 18817 }, { "epoch": 0.3439779187306926, "grad_norm": 6.6552676311017365, "learning_rate": 7.630262818566264e-06, "loss": 17.5556, "step": 18818 }, { "epoch": 0.34399619792713915, "grad_norm": 7.680684311602772, "learning_rate": 7.630011070335788e-06, "loss": 18.5134, "step": 18819 }, { "epoch": 0.3440144771235856, "grad_norm": 5.6916536488339355, "learning_rate": 7.629759312887353e-06, "loss": 17.4569, "step": 18820 }, { "epoch": 0.34403275632003216, "grad_norm": 7.07473974257135, "learning_rate": 7.62950754622184e-06, "loss": 18.1428, "step": 18821 }, { "epoch": 0.3440510355164787, "grad_norm": 6.386525174074634, "learning_rate": 7.62925577034013e-06, "loss": 17.6783, "step": 18822 }, { "epoch": 0.34406931471292523, "grad_norm": 7.169004794316508, "learning_rate": 7.629003985243108e-06, "loss": 17.8882, "step": 18823 }, { "epoch": 0.34408759390937177, "grad_norm": 6.911734582211646, "learning_rate": 7.628752190931654e-06, "loss": 18.1529, "step": 18824 }, { "epoch": 0.34410587310581825, "grad_norm": 5.576247646965134, "learning_rate": 7.628500387406652e-06, "loss": 17.0755, "step": 18825 }, { "epoch": 0.3441241523022648, "grad_norm": 7.021326916468734, "learning_rate": 7.6282485746689835e-06, "loss": 17.8135, "step": 18826 }, { "epoch": 0.3441424314987113, "grad_norm": 5.888912413575095, "learning_rate": 7.627996752719533e-06, "loss": 17.1671, "step": 18827 }, { "epoch": 0.34416071069515786, "grad_norm": 7.174716073977307, "learning_rate": 7.627744921559183e-06, "loss": 17.7786, "step": 18828 }, { "epoch": 0.3441789898916044, "grad_norm": 6.440076200999903, "learning_rate": 7.627493081188813e-06, "loss": 17.3864, "step": 18829 }, { "epoch": 0.3441972690880509, "grad_norm": 6.371314035709741, "learning_rate": 7.627241231609308e-06, "loss": 17.3974, "step": 18830 }, { "epoch": 0.3442155482844974, "grad_norm": 7.178583371291788, "learning_rate": 7.626989372821552e-06, "loss": 17.9502, "step": 18831 }, { "epoch": 0.34423382748094394, "grad_norm": 5.43836292811123, "learning_rate": 7.6267375048264245e-06, "loss": 17.1827, "step": 18832 }, { "epoch": 0.3442521066773905, "grad_norm": 6.061526697036359, "learning_rate": 7.62648562762481e-06, "loss": 17.364, "step": 18833 }, { "epoch": 0.34427038587383696, "grad_norm": 7.416400119145123, "learning_rate": 7.626233741217592e-06, "loss": 17.9628, "step": 18834 }, { "epoch": 0.3442886650702835, "grad_norm": 6.155932104665575, "learning_rate": 7.625981845605652e-06, "loss": 17.6379, "step": 18835 }, { "epoch": 0.34430694426673003, "grad_norm": 6.286729678642646, "learning_rate": 7.625729940789875e-06, "loss": 17.0942, "step": 18836 }, { "epoch": 0.34432522346317657, "grad_norm": 5.570072005269807, "learning_rate": 7.625478026771143e-06, "loss": 17.1133, "step": 18837 }, { "epoch": 0.3443435026596231, "grad_norm": 8.294658198108628, "learning_rate": 7.625226103550334e-06, "loss": 18.2501, "step": 18838 }, { "epoch": 0.3443617818560696, "grad_norm": 6.554547636739679, "learning_rate": 7.6249741711283385e-06, "loss": 17.6861, "step": 18839 }, { "epoch": 0.3443800610525161, "grad_norm": 5.81957898550504, "learning_rate": 7.624722229506036e-06, "loss": 17.3066, "step": 18840 }, { "epoch": 0.34439834024896265, "grad_norm": 6.623162130971828, "learning_rate": 7.624470278684311e-06, "loss": 17.4684, "step": 18841 }, { "epoch": 0.3444166194454092, "grad_norm": 7.694048787608055, "learning_rate": 7.624218318664044e-06, "loss": 18.2676, "step": 18842 }, { "epoch": 0.3444348986418557, "grad_norm": 6.292099350719881, "learning_rate": 7.6239663494461195e-06, "loss": 17.409, "step": 18843 }, { "epoch": 0.3444531778383022, "grad_norm": 6.2170305037463836, "learning_rate": 7.623714371031421e-06, "loss": 17.2171, "step": 18844 }, { "epoch": 0.34447145703474874, "grad_norm": 7.276994272151025, "learning_rate": 7.623462383420831e-06, "loss": 17.9421, "step": 18845 }, { "epoch": 0.3444897362311953, "grad_norm": 6.682340604025584, "learning_rate": 7.6232103866152325e-06, "loss": 17.2774, "step": 18846 }, { "epoch": 0.3445080154276418, "grad_norm": 7.655083146438742, "learning_rate": 7.622958380615511e-06, "loss": 18.1831, "step": 18847 }, { "epoch": 0.34452629462408835, "grad_norm": 6.375371933391231, "learning_rate": 7.622706365422545e-06, "loss": 17.7205, "step": 18848 }, { "epoch": 0.3445445738205348, "grad_norm": 7.460218703284634, "learning_rate": 7.622454341037224e-06, "loss": 18.0615, "step": 18849 }, { "epoch": 0.34456285301698136, "grad_norm": 7.557403389145596, "learning_rate": 7.622202307460426e-06, "loss": 17.9129, "step": 18850 }, { "epoch": 0.3445811322134279, "grad_norm": 5.463903761896323, "learning_rate": 7.621950264693039e-06, "loss": 17.2442, "step": 18851 }, { "epoch": 0.34459941140987443, "grad_norm": 5.320197632858207, "learning_rate": 7.62169821273594e-06, "loss": 17.2238, "step": 18852 }, { "epoch": 0.34461769060632097, "grad_norm": 5.893030144033991, "learning_rate": 7.621446151590018e-06, "loss": 17.4173, "step": 18853 }, { "epoch": 0.34463596980276745, "grad_norm": 6.252265876898372, "learning_rate": 7.6211940812561555e-06, "loss": 17.5848, "step": 18854 }, { "epoch": 0.344654248999214, "grad_norm": 7.255444106176171, "learning_rate": 7.620942001735235e-06, "loss": 17.8887, "step": 18855 }, { "epoch": 0.3446725281956605, "grad_norm": 5.76311644733677, "learning_rate": 7.620689913028141e-06, "loss": 17.1523, "step": 18856 }, { "epoch": 0.34469080739210706, "grad_norm": 6.070883116721132, "learning_rate": 7.620437815135753e-06, "loss": 17.1883, "step": 18857 }, { "epoch": 0.3447090865885536, "grad_norm": 6.704219160795553, "learning_rate": 7.620185708058962e-06, "loss": 17.6405, "step": 18858 }, { "epoch": 0.3447273657850001, "grad_norm": 5.39117974837999, "learning_rate": 7.619933591798645e-06, "loss": 17.2863, "step": 18859 }, { "epoch": 0.3447456449814466, "grad_norm": 5.623336980396304, "learning_rate": 7.619681466355688e-06, "loss": 17.4557, "step": 18860 }, { "epoch": 0.34476392417789314, "grad_norm": 6.1063789323148985, "learning_rate": 7.619429331730977e-06, "loss": 17.6039, "step": 18861 }, { "epoch": 0.3447822033743397, "grad_norm": 7.56351926677025, "learning_rate": 7.6191771879253905e-06, "loss": 17.9513, "step": 18862 }, { "epoch": 0.3448004825707862, "grad_norm": 6.106402972925933, "learning_rate": 7.618925034939817e-06, "loss": 17.3865, "step": 18863 }, { "epoch": 0.3448187617672327, "grad_norm": 5.920384771196159, "learning_rate": 7.618672872775138e-06, "loss": 17.3956, "step": 18864 }, { "epoch": 0.34483704096367923, "grad_norm": 6.670774222241027, "learning_rate": 7.618420701432238e-06, "loss": 17.9289, "step": 18865 }, { "epoch": 0.34485532016012577, "grad_norm": 7.524320600353264, "learning_rate": 7.618168520912001e-06, "loss": 18.0164, "step": 18866 }, { "epoch": 0.3448735993565723, "grad_norm": 6.578151056101703, "learning_rate": 7.617916331215309e-06, "loss": 17.4418, "step": 18867 }, { "epoch": 0.3448918785530188, "grad_norm": 6.111321466298664, "learning_rate": 7.61766413234305e-06, "loss": 17.4151, "step": 18868 }, { "epoch": 0.3449101577494653, "grad_norm": 6.647655970034263, "learning_rate": 7.617411924296103e-06, "loss": 17.8468, "step": 18869 }, { "epoch": 0.34492843694591185, "grad_norm": 5.391290134991107, "learning_rate": 7.617159707075355e-06, "loss": 17.1188, "step": 18870 }, { "epoch": 0.3449467161423584, "grad_norm": 5.836604081616722, "learning_rate": 7.61690748068169e-06, "loss": 17.4615, "step": 18871 }, { "epoch": 0.3449649953388049, "grad_norm": 5.828869638207708, "learning_rate": 7.6166552451159914e-06, "loss": 17.298, "step": 18872 }, { "epoch": 0.3449832745352514, "grad_norm": 6.268811339034368, "learning_rate": 7.6164030003791424e-06, "loss": 17.5558, "step": 18873 }, { "epoch": 0.34500155373169794, "grad_norm": 7.453674579298899, "learning_rate": 7.6161507464720285e-06, "loss": 17.7549, "step": 18874 }, { "epoch": 0.3450198329281445, "grad_norm": 6.651998662950934, "learning_rate": 7.615898483395534e-06, "loss": 17.8893, "step": 18875 }, { "epoch": 0.345038112124591, "grad_norm": 6.128656015172041, "learning_rate": 7.61564621115054e-06, "loss": 17.6188, "step": 18876 }, { "epoch": 0.34505639132103755, "grad_norm": 6.3756299741129485, "learning_rate": 7.615393929737935e-06, "loss": 17.6619, "step": 18877 }, { "epoch": 0.34507467051748403, "grad_norm": 5.825326039746997, "learning_rate": 7.615141639158601e-06, "loss": 17.3186, "step": 18878 }, { "epoch": 0.34509294971393056, "grad_norm": 6.891541289228385, "learning_rate": 7.614889339413422e-06, "loss": 17.9987, "step": 18879 }, { "epoch": 0.3451112289103771, "grad_norm": 5.671312721659657, "learning_rate": 7.614637030503282e-06, "loss": 16.9284, "step": 18880 }, { "epoch": 0.34512950810682363, "grad_norm": 6.622144873740716, "learning_rate": 7.614384712429068e-06, "loss": 17.5671, "step": 18881 }, { "epoch": 0.34514778730327017, "grad_norm": 7.310112596708961, "learning_rate": 7.614132385191661e-06, "loss": 17.9785, "step": 18882 }, { "epoch": 0.34516606649971665, "grad_norm": 5.69360839671099, "learning_rate": 7.613880048791948e-06, "loss": 17.1646, "step": 18883 }, { "epoch": 0.3451843456961632, "grad_norm": 6.170253810983289, "learning_rate": 7.61362770323081e-06, "loss": 17.4712, "step": 18884 }, { "epoch": 0.3452026248926097, "grad_norm": 5.50022870992592, "learning_rate": 7.613375348509135e-06, "loss": 17.2666, "step": 18885 }, { "epoch": 0.34522090408905626, "grad_norm": 7.188684396091385, "learning_rate": 7.613122984627808e-06, "loss": 17.6921, "step": 18886 }, { "epoch": 0.3452391832855028, "grad_norm": 7.81632667876437, "learning_rate": 7.61287061158771e-06, "loss": 18.0837, "step": 18887 }, { "epoch": 0.3452574624819493, "grad_norm": 6.656226293490472, "learning_rate": 7.612618229389728e-06, "loss": 17.4341, "step": 18888 }, { "epoch": 0.3452757416783958, "grad_norm": 6.449742042322299, "learning_rate": 7.6123658380347446e-06, "loss": 17.4351, "step": 18889 }, { "epoch": 0.34529402087484234, "grad_norm": 6.712440839298584, "learning_rate": 7.612113437523646e-06, "loss": 17.6861, "step": 18890 }, { "epoch": 0.3453123000712889, "grad_norm": 8.251373663963818, "learning_rate": 7.611861027857317e-06, "loss": 18.1807, "step": 18891 }, { "epoch": 0.3453305792677354, "grad_norm": 6.296892881834908, "learning_rate": 7.61160860903664e-06, "loss": 17.5629, "step": 18892 }, { "epoch": 0.3453488584641819, "grad_norm": 6.264489853355421, "learning_rate": 7.611356181062503e-06, "loss": 17.5003, "step": 18893 }, { "epoch": 0.34536713766062843, "grad_norm": 6.240038453594531, "learning_rate": 7.61110374393579e-06, "loss": 17.3175, "step": 18894 }, { "epoch": 0.34538541685707497, "grad_norm": 6.152512085827735, "learning_rate": 7.610851297657383e-06, "loss": 17.3498, "step": 18895 }, { "epoch": 0.3454036960535215, "grad_norm": 6.005694368651467, "learning_rate": 7.6105988422281715e-06, "loss": 17.0651, "step": 18896 }, { "epoch": 0.34542197524996804, "grad_norm": 5.959610914219258, "learning_rate": 7.610346377649034e-06, "loss": 17.4591, "step": 18897 }, { "epoch": 0.3454402544464145, "grad_norm": 6.33804627651429, "learning_rate": 7.610093903920861e-06, "loss": 17.6828, "step": 18898 }, { "epoch": 0.34545853364286105, "grad_norm": 7.439112451385574, "learning_rate": 7.609841421044537e-06, "loss": 17.9568, "step": 18899 }, { "epoch": 0.3454768128393076, "grad_norm": 7.210065475661961, "learning_rate": 7.609588929020944e-06, "loss": 17.7448, "step": 18900 }, { "epoch": 0.3454950920357541, "grad_norm": 7.720746780965516, "learning_rate": 7.609336427850966e-06, "loss": 17.9802, "step": 18901 }, { "epoch": 0.3455133712322006, "grad_norm": 5.498515695973799, "learning_rate": 7.609083917535491e-06, "loss": 17.3154, "step": 18902 }, { "epoch": 0.34553165042864714, "grad_norm": 5.962879130467125, "learning_rate": 7.608831398075406e-06, "loss": 17.3829, "step": 18903 }, { "epoch": 0.3455499296250937, "grad_norm": 6.846620968994786, "learning_rate": 7.608578869471591e-06, "loss": 17.8339, "step": 18904 }, { "epoch": 0.3455682088215402, "grad_norm": 7.6018391661958145, "learning_rate": 7.6083263317249335e-06, "loss": 18.1102, "step": 18905 }, { "epoch": 0.34558648801798675, "grad_norm": 5.784234635221302, "learning_rate": 7.6080737848363195e-06, "loss": 17.2917, "step": 18906 }, { "epoch": 0.34560476721443323, "grad_norm": 5.752357111200658, "learning_rate": 7.6078212288066335e-06, "loss": 17.2057, "step": 18907 }, { "epoch": 0.34562304641087976, "grad_norm": 6.459778009405557, "learning_rate": 7.60756866363676e-06, "loss": 17.1771, "step": 18908 }, { "epoch": 0.3456413256073263, "grad_norm": 7.0444922758887865, "learning_rate": 7.6073160893275834e-06, "loss": 17.8434, "step": 18909 }, { "epoch": 0.34565960480377284, "grad_norm": 6.265177353041961, "learning_rate": 7.6070635058799915e-06, "loss": 17.1887, "step": 18910 }, { "epoch": 0.34567788400021937, "grad_norm": 7.053831506175236, "learning_rate": 7.606810913294866e-06, "loss": 17.7498, "step": 18911 }, { "epoch": 0.34569616319666585, "grad_norm": 5.4893544285976725, "learning_rate": 7.606558311573097e-06, "loss": 17.1869, "step": 18912 }, { "epoch": 0.3457144423931124, "grad_norm": 6.470420202776055, "learning_rate": 7.606305700715567e-06, "loss": 17.6234, "step": 18913 }, { "epoch": 0.3457327215895589, "grad_norm": 5.8681958086487915, "learning_rate": 7.606053080723161e-06, "loss": 17.1226, "step": 18914 }, { "epoch": 0.34575100078600546, "grad_norm": 7.992891775574533, "learning_rate": 7.605800451596765e-06, "loss": 17.9576, "step": 18915 }, { "epoch": 0.345769279982452, "grad_norm": 5.892657778749329, "learning_rate": 7.605547813337264e-06, "loss": 17.2621, "step": 18916 }, { "epoch": 0.3457875591788985, "grad_norm": 5.501337991493749, "learning_rate": 7.605295165945546e-06, "loss": 17.1447, "step": 18917 }, { "epoch": 0.345805838375345, "grad_norm": 7.913320990572355, "learning_rate": 7.605042509422493e-06, "loss": 18.0964, "step": 18918 }, { "epoch": 0.34582411757179154, "grad_norm": 6.4001766127926905, "learning_rate": 7.604789843768993e-06, "loss": 17.3341, "step": 18919 }, { "epoch": 0.3458423967682381, "grad_norm": 7.205033271136617, "learning_rate": 7.60453716898593e-06, "loss": 17.9212, "step": 18920 }, { "epoch": 0.3458606759646846, "grad_norm": 5.8284231389009395, "learning_rate": 7.6042844850741905e-06, "loss": 17.1386, "step": 18921 }, { "epoch": 0.3458789551611311, "grad_norm": 7.97500368082429, "learning_rate": 7.604031792034659e-06, "loss": 18.2105, "step": 18922 }, { "epoch": 0.34589723435757763, "grad_norm": 6.882976466923342, "learning_rate": 7.603779089868224e-06, "loss": 17.7936, "step": 18923 }, { "epoch": 0.34591551355402417, "grad_norm": 6.071329614133939, "learning_rate": 7.603526378575767e-06, "loss": 17.3471, "step": 18924 }, { "epoch": 0.3459337927504707, "grad_norm": 7.596351184938982, "learning_rate": 7.603273658158177e-06, "loss": 18.3059, "step": 18925 }, { "epoch": 0.34595207194691724, "grad_norm": 7.818145529326444, "learning_rate": 7.603020928616338e-06, "loss": 17.9933, "step": 18926 }, { "epoch": 0.3459703511433637, "grad_norm": 5.9681859650846, "learning_rate": 7.602768189951138e-06, "loss": 17.3911, "step": 18927 }, { "epoch": 0.34598863033981025, "grad_norm": 7.141417352971058, "learning_rate": 7.602515442163461e-06, "loss": 17.5778, "step": 18928 }, { "epoch": 0.3460069095362568, "grad_norm": 5.776967542647376, "learning_rate": 7.602262685254192e-06, "loss": 17.2206, "step": 18929 }, { "epoch": 0.3460251887327033, "grad_norm": 7.337196629646757, "learning_rate": 7.602009919224219e-06, "loss": 18.1074, "step": 18930 }, { "epoch": 0.34604346792914986, "grad_norm": 6.295462201743125, "learning_rate": 7.6017571440744264e-06, "loss": 17.4926, "step": 18931 }, { "epoch": 0.34606174712559634, "grad_norm": 6.889041399251884, "learning_rate": 7.601504359805702e-06, "loss": 17.6455, "step": 18932 }, { "epoch": 0.3460800263220429, "grad_norm": 8.463787252771924, "learning_rate": 7.60125156641893e-06, "loss": 18.1351, "step": 18933 }, { "epoch": 0.3460983055184894, "grad_norm": 6.931909202236026, "learning_rate": 7.600998763914996e-06, "loss": 17.6481, "step": 18934 }, { "epoch": 0.34611658471493595, "grad_norm": 6.67161552163744, "learning_rate": 7.6007459522947875e-06, "loss": 17.763, "step": 18935 }, { "epoch": 0.34613486391138243, "grad_norm": 6.180953002498059, "learning_rate": 7.60049313155919e-06, "loss": 17.3172, "step": 18936 }, { "epoch": 0.34615314310782896, "grad_norm": 5.520944988557824, "learning_rate": 7.600240301709092e-06, "loss": 17.0942, "step": 18937 }, { "epoch": 0.3461714223042755, "grad_norm": 5.072948752035825, "learning_rate": 7.599987462745375e-06, "loss": 16.8876, "step": 18938 }, { "epoch": 0.34618970150072204, "grad_norm": 8.091342686028737, "learning_rate": 7.599734614668928e-06, "loss": 17.9227, "step": 18939 }, { "epoch": 0.34620798069716857, "grad_norm": 5.525980838938255, "learning_rate": 7.599481757480636e-06, "loss": 17.1579, "step": 18940 }, { "epoch": 0.34622625989361505, "grad_norm": 6.154743454988897, "learning_rate": 7.599228891181389e-06, "loss": 17.4544, "step": 18941 }, { "epoch": 0.3462445390900616, "grad_norm": 7.132020764960109, "learning_rate": 7.5989760157720675e-06, "loss": 17.8731, "step": 18942 }, { "epoch": 0.3462628182865081, "grad_norm": 5.503747199022232, "learning_rate": 7.59872313125356e-06, "loss": 17.073, "step": 18943 }, { "epoch": 0.34628109748295466, "grad_norm": 6.0161469480476795, "learning_rate": 7.598470237626756e-06, "loss": 17.2366, "step": 18944 }, { "epoch": 0.3462993766794012, "grad_norm": 6.026055133355524, "learning_rate": 7.598217334892537e-06, "loss": 17.3908, "step": 18945 }, { "epoch": 0.3463176558758477, "grad_norm": 6.8960372076668035, "learning_rate": 7.597964423051794e-06, "loss": 17.6786, "step": 18946 }, { "epoch": 0.3463359350722942, "grad_norm": 7.113678456554318, "learning_rate": 7.5977115021054096e-06, "loss": 18.041, "step": 18947 }, { "epoch": 0.34635421426874075, "grad_norm": 6.122004662433077, "learning_rate": 7.597458572054272e-06, "loss": 17.2704, "step": 18948 }, { "epoch": 0.3463724934651873, "grad_norm": 8.464978046311984, "learning_rate": 7.597205632899267e-06, "loss": 18.6798, "step": 18949 }, { "epoch": 0.3463907726616338, "grad_norm": 6.166113285087843, "learning_rate": 7.596952684641282e-06, "loss": 17.5786, "step": 18950 }, { "epoch": 0.3464090518580803, "grad_norm": 8.464100664878849, "learning_rate": 7.596699727281205e-06, "loss": 18.4814, "step": 18951 }, { "epoch": 0.34642733105452683, "grad_norm": 6.51298013944387, "learning_rate": 7.596446760819918e-06, "loss": 17.5248, "step": 18952 }, { "epoch": 0.34644561025097337, "grad_norm": 8.071762299248826, "learning_rate": 7.596193785258311e-06, "loss": 18.2478, "step": 18953 }, { "epoch": 0.3464638894474199, "grad_norm": 6.861047489719463, "learning_rate": 7.59594080059727e-06, "loss": 17.6007, "step": 18954 }, { "epoch": 0.34648216864386644, "grad_norm": 7.388227593370416, "learning_rate": 7.595687806837683e-06, "loss": 17.9192, "step": 18955 }, { "epoch": 0.3465004478403129, "grad_norm": 6.022438807693688, "learning_rate": 7.595434803980436e-06, "loss": 17.35, "step": 18956 }, { "epoch": 0.34651872703675946, "grad_norm": 6.939677496048242, "learning_rate": 7.595181792026414e-06, "loss": 17.8307, "step": 18957 }, { "epoch": 0.346537006233206, "grad_norm": 6.9813038396306135, "learning_rate": 7.594928770976505e-06, "loss": 17.8424, "step": 18958 }, { "epoch": 0.3465552854296525, "grad_norm": 5.442763769373437, "learning_rate": 7.594675740831597e-06, "loss": 17.1987, "step": 18959 }, { "epoch": 0.34657356462609906, "grad_norm": 6.688560567220221, "learning_rate": 7.594422701592574e-06, "loss": 17.663, "step": 18960 }, { "epoch": 0.34659184382254554, "grad_norm": 7.418697154261923, "learning_rate": 7.5941696532603246e-06, "loss": 17.8478, "step": 18961 }, { "epoch": 0.3466101230189921, "grad_norm": 6.555275826485624, "learning_rate": 7.5939165958357365e-06, "loss": 17.7269, "step": 18962 }, { "epoch": 0.3466284022154386, "grad_norm": 6.872575812728452, "learning_rate": 7.593663529319695e-06, "loss": 17.8606, "step": 18963 }, { "epoch": 0.34664668141188515, "grad_norm": 6.927386186000394, "learning_rate": 7.593410453713088e-06, "loss": 17.8418, "step": 18964 }, { "epoch": 0.3466649606083317, "grad_norm": 6.69084749891412, "learning_rate": 7.593157369016804e-06, "loss": 17.454, "step": 18965 }, { "epoch": 0.34668323980477816, "grad_norm": 6.755837995512869, "learning_rate": 7.592904275231727e-06, "loss": 17.8124, "step": 18966 }, { "epoch": 0.3467015190012247, "grad_norm": 6.100701960625412, "learning_rate": 7.592651172358746e-06, "loss": 17.5114, "step": 18967 }, { "epoch": 0.34671979819767124, "grad_norm": 6.9029266075070135, "learning_rate": 7.592398060398749e-06, "loss": 17.7986, "step": 18968 }, { "epoch": 0.34673807739411777, "grad_norm": 6.1624585317135745, "learning_rate": 7.592144939352619e-06, "loss": 17.2991, "step": 18969 }, { "epoch": 0.34675635659056425, "grad_norm": 5.21458580299733, "learning_rate": 7.591891809221247e-06, "loss": 17.201, "step": 18970 }, { "epoch": 0.3467746357870108, "grad_norm": 7.3290017937401455, "learning_rate": 7.591638670005519e-06, "loss": 18.0177, "step": 18971 }, { "epoch": 0.3467929149834573, "grad_norm": 5.548219698809922, "learning_rate": 7.591385521706324e-06, "loss": 17.2977, "step": 18972 }, { "epoch": 0.34681119417990386, "grad_norm": 6.618569731965456, "learning_rate": 7.591132364324545e-06, "loss": 17.7773, "step": 18973 }, { "epoch": 0.3468294733763504, "grad_norm": 5.670441696830813, "learning_rate": 7.5908791978610745e-06, "loss": 17.1742, "step": 18974 }, { "epoch": 0.3468477525727969, "grad_norm": 6.266394248876967, "learning_rate": 7.590626022316796e-06, "loss": 17.4635, "step": 18975 }, { "epoch": 0.3468660317692434, "grad_norm": 7.1009250742474554, "learning_rate": 7.590372837692597e-06, "loss": 17.8548, "step": 18976 }, { "epoch": 0.34688431096568995, "grad_norm": 8.017534823453387, "learning_rate": 7.5901196439893664e-06, "loss": 18.4484, "step": 18977 }, { "epoch": 0.3469025901621365, "grad_norm": 9.423774676795624, "learning_rate": 7.589866441207993e-06, "loss": 18.6049, "step": 18978 }, { "epoch": 0.346920869358583, "grad_norm": 5.964929523147536, "learning_rate": 7.589613229349361e-06, "loss": 17.4383, "step": 18979 }, { "epoch": 0.3469391485550295, "grad_norm": 6.44123440510207, "learning_rate": 7.5893600084143595e-06, "loss": 17.6667, "step": 18980 }, { "epoch": 0.34695742775147603, "grad_norm": 6.673572921483574, "learning_rate": 7.589106778403876e-06, "loss": 17.7146, "step": 18981 }, { "epoch": 0.34697570694792257, "grad_norm": 5.71994061700808, "learning_rate": 7.588853539318799e-06, "loss": 17.4912, "step": 18982 }, { "epoch": 0.3469939861443691, "grad_norm": 6.24984709488286, "learning_rate": 7.588600291160013e-06, "loss": 17.6651, "step": 18983 }, { "epoch": 0.34701226534081564, "grad_norm": 6.899227373441953, "learning_rate": 7.588347033928408e-06, "loss": 18.0038, "step": 18984 }, { "epoch": 0.3470305445372621, "grad_norm": 7.093099139063351, "learning_rate": 7.588093767624871e-06, "loss": 17.9797, "step": 18985 }, { "epoch": 0.34704882373370866, "grad_norm": 6.660768228404228, "learning_rate": 7.587840492250293e-06, "loss": 17.3602, "step": 18986 }, { "epoch": 0.3470671029301552, "grad_norm": 6.04878390469459, "learning_rate": 7.587587207805555e-06, "loss": 17.2671, "step": 18987 }, { "epoch": 0.3470853821266017, "grad_norm": 8.783789370315844, "learning_rate": 7.587333914291551e-06, "loss": 19.0311, "step": 18988 }, { "epoch": 0.34710366132304826, "grad_norm": 6.75572416519923, "learning_rate": 7.587080611709163e-06, "loss": 18.0498, "step": 18989 }, { "epoch": 0.34712194051949474, "grad_norm": 6.152852386853728, "learning_rate": 7.586827300059285e-06, "loss": 17.3344, "step": 18990 }, { "epoch": 0.3471402197159413, "grad_norm": 8.027940366040578, "learning_rate": 7.586573979342799e-06, "loss": 18.1787, "step": 18991 }, { "epoch": 0.3471584989123878, "grad_norm": 7.033464796487611, "learning_rate": 7.586320649560599e-06, "loss": 17.2953, "step": 18992 }, { "epoch": 0.34717677810883435, "grad_norm": 6.151192848005572, "learning_rate": 7.586067310713567e-06, "loss": 17.5295, "step": 18993 }, { "epoch": 0.3471950573052809, "grad_norm": 6.775272982301498, "learning_rate": 7.585813962802595e-06, "loss": 17.9858, "step": 18994 }, { "epoch": 0.34721333650172737, "grad_norm": 7.107263174528969, "learning_rate": 7.585560605828568e-06, "loss": 17.8125, "step": 18995 }, { "epoch": 0.3472316156981739, "grad_norm": 8.009063603799497, "learning_rate": 7.585307239792377e-06, "loss": 18.1853, "step": 18996 }, { "epoch": 0.34724989489462044, "grad_norm": 5.769403736100324, "learning_rate": 7.585053864694907e-06, "loss": 17.463, "step": 18997 }, { "epoch": 0.347268174091067, "grad_norm": 5.6762921224754175, "learning_rate": 7.5848004805370475e-06, "loss": 17.2014, "step": 18998 }, { "epoch": 0.3472864532875135, "grad_norm": 7.577787481676225, "learning_rate": 7.584547087319689e-06, "loss": 18.1574, "step": 18999 }, { "epoch": 0.34730473248396, "grad_norm": 5.820216814608337, "learning_rate": 7.584293685043716e-06, "loss": 17.3679, "step": 19000 }, { "epoch": 0.3473230116804065, "grad_norm": 6.389470657618189, "learning_rate": 7.584040273710016e-06, "loss": 17.6025, "step": 19001 }, { "epoch": 0.34734129087685306, "grad_norm": 6.825169449048844, "learning_rate": 7.583786853319479e-06, "loss": 17.7989, "step": 19002 }, { "epoch": 0.3473595700732996, "grad_norm": 5.567763698087035, "learning_rate": 7.583533423872997e-06, "loss": 17.2088, "step": 19003 }, { "epoch": 0.3473778492697461, "grad_norm": 6.1100989530468, "learning_rate": 7.583279985371452e-06, "loss": 17.6809, "step": 19004 }, { "epoch": 0.3473961284661926, "grad_norm": 7.535361986395495, "learning_rate": 7.583026537815734e-06, "loss": 17.9317, "step": 19005 }, { "epoch": 0.34741440766263915, "grad_norm": 7.958814589469808, "learning_rate": 7.582773081206733e-06, "loss": 18.0453, "step": 19006 }, { "epoch": 0.3474326868590857, "grad_norm": 7.2911889967958015, "learning_rate": 7.582519615545339e-06, "loss": 18.1261, "step": 19007 }, { "epoch": 0.3474509660555322, "grad_norm": 8.145035509363272, "learning_rate": 7.582266140832435e-06, "loss": 18.7788, "step": 19008 }, { "epoch": 0.3474692452519787, "grad_norm": 7.024365272537474, "learning_rate": 7.582012657068912e-06, "loss": 17.6573, "step": 19009 }, { "epoch": 0.34748752444842523, "grad_norm": 5.462961894964153, "learning_rate": 7.58175916425566e-06, "loss": 17.2726, "step": 19010 }, { "epoch": 0.34750580364487177, "grad_norm": 7.841845760232381, "learning_rate": 7.581505662393564e-06, "loss": 18.0179, "step": 19011 }, { "epoch": 0.3475240828413183, "grad_norm": 5.8154820249981745, "learning_rate": 7.581252151483518e-06, "loss": 17.4179, "step": 19012 }, { "epoch": 0.34754236203776484, "grad_norm": 5.71731248489601, "learning_rate": 7.580998631526406e-06, "loss": 17.18, "step": 19013 }, { "epoch": 0.3475606412342113, "grad_norm": 6.907962212338977, "learning_rate": 7.580745102523117e-06, "loss": 17.7363, "step": 19014 }, { "epoch": 0.34757892043065786, "grad_norm": 8.04364219464597, "learning_rate": 7.580491564474542e-06, "loss": 18.3559, "step": 19015 }, { "epoch": 0.3475971996271044, "grad_norm": 7.898696894901157, "learning_rate": 7.580238017381565e-06, "loss": 18.4485, "step": 19016 }, { "epoch": 0.3476154788235509, "grad_norm": 7.3644667197822535, "learning_rate": 7.57998446124508e-06, "loss": 17.9994, "step": 19017 }, { "epoch": 0.34763375801999746, "grad_norm": 7.964761199509911, "learning_rate": 7.579730896065974e-06, "loss": 18.6357, "step": 19018 }, { "epoch": 0.34765203721644394, "grad_norm": 7.69745785077081, "learning_rate": 7.5794773218451324e-06, "loss": 17.9703, "step": 19019 }, { "epoch": 0.3476703164128905, "grad_norm": 5.963685721172786, "learning_rate": 7.579223738583448e-06, "loss": 17.3082, "step": 19020 }, { "epoch": 0.347688595609337, "grad_norm": 5.4843169921823245, "learning_rate": 7.5789701462818075e-06, "loss": 17.3481, "step": 19021 }, { "epoch": 0.34770687480578355, "grad_norm": 7.035984081381331, "learning_rate": 7.578716544941102e-06, "loss": 17.8914, "step": 19022 }, { "epoch": 0.3477251540022301, "grad_norm": 8.248334072918139, "learning_rate": 7.578462934562217e-06, "loss": 17.92, "step": 19023 }, { "epoch": 0.34774343319867657, "grad_norm": 6.501565335749314, "learning_rate": 7.5782093151460435e-06, "loss": 17.3995, "step": 19024 }, { "epoch": 0.3477617123951231, "grad_norm": 7.022827693322409, "learning_rate": 7.5779556866934715e-06, "loss": 18.0678, "step": 19025 }, { "epoch": 0.34777999159156964, "grad_norm": 8.056792785808573, "learning_rate": 7.577702049205387e-06, "loss": 18.0544, "step": 19026 }, { "epoch": 0.3477982707880162, "grad_norm": 5.813902440435188, "learning_rate": 7.577448402682682e-06, "loss": 17.2232, "step": 19027 }, { "epoch": 0.3478165499844627, "grad_norm": 6.1137023407954185, "learning_rate": 7.577194747126244e-06, "loss": 17.3333, "step": 19028 }, { "epoch": 0.3478348291809092, "grad_norm": 5.989409712986414, "learning_rate": 7.57694108253696e-06, "loss": 17.2316, "step": 19029 }, { "epoch": 0.3478531083773557, "grad_norm": 5.130493179645548, "learning_rate": 7.576687408915723e-06, "loss": 16.9334, "step": 19030 }, { "epoch": 0.34787138757380226, "grad_norm": 6.070497004139617, "learning_rate": 7.576433726263419e-06, "loss": 17.4544, "step": 19031 }, { "epoch": 0.3478896667702488, "grad_norm": 6.206023807432817, "learning_rate": 7.57618003458094e-06, "loss": 17.7142, "step": 19032 }, { "epoch": 0.34790794596669533, "grad_norm": 6.3894035943645315, "learning_rate": 7.575926333869172e-06, "loss": 17.2279, "step": 19033 }, { "epoch": 0.3479262251631418, "grad_norm": 4.903387349998527, "learning_rate": 7.575672624129006e-06, "loss": 16.7663, "step": 19034 }, { "epoch": 0.34794450435958835, "grad_norm": 6.085588517306378, "learning_rate": 7.575418905361331e-06, "loss": 17.1808, "step": 19035 }, { "epoch": 0.3479627835560349, "grad_norm": 8.329898932990947, "learning_rate": 7.575165177567036e-06, "loss": 17.7279, "step": 19036 }, { "epoch": 0.3479810627524814, "grad_norm": 5.714696455360687, "learning_rate": 7.574911440747013e-06, "loss": 17.2421, "step": 19037 }, { "epoch": 0.3479993419489279, "grad_norm": 7.4415930191479, "learning_rate": 7.574657694902146e-06, "loss": 18.241, "step": 19038 }, { "epoch": 0.34801762114537443, "grad_norm": 7.682200406043389, "learning_rate": 7.574403940033326e-06, "loss": 17.6827, "step": 19039 }, { "epoch": 0.34803590034182097, "grad_norm": 7.373737036423542, "learning_rate": 7.574150176141446e-06, "loss": 18.2338, "step": 19040 }, { "epoch": 0.3480541795382675, "grad_norm": 6.582039258523045, "learning_rate": 7.573896403227395e-06, "loss": 17.7226, "step": 19041 }, { "epoch": 0.34807245873471404, "grad_norm": 5.455502816831152, "learning_rate": 7.573642621292056e-06, "loss": 17.3168, "step": 19042 }, { "epoch": 0.3480907379311605, "grad_norm": 10.111280711634576, "learning_rate": 7.5733888303363235e-06, "loss": 18.9197, "step": 19043 }, { "epoch": 0.34810901712760706, "grad_norm": 7.355693970955116, "learning_rate": 7.57313503036109e-06, "loss": 17.8706, "step": 19044 }, { "epoch": 0.3481272963240536, "grad_norm": 5.411445701675955, "learning_rate": 7.572881221367239e-06, "loss": 17.137, "step": 19045 }, { "epoch": 0.34814557552050013, "grad_norm": 7.883532814525666, "learning_rate": 7.572627403355664e-06, "loss": 18.134, "step": 19046 }, { "epoch": 0.34816385471694666, "grad_norm": 6.0318294601231495, "learning_rate": 7.572373576327251e-06, "loss": 17.2574, "step": 19047 }, { "epoch": 0.34818213391339314, "grad_norm": 7.86051881596752, "learning_rate": 7.572119740282893e-06, "loss": 18.2593, "step": 19048 }, { "epoch": 0.3482004131098397, "grad_norm": 5.075490205629728, "learning_rate": 7.571865895223478e-06, "loss": 16.8729, "step": 19049 }, { "epoch": 0.3482186923062862, "grad_norm": 8.147328538856263, "learning_rate": 7.5716120411498965e-06, "loss": 18.4634, "step": 19050 }, { "epoch": 0.34823697150273275, "grad_norm": 5.504469117713465, "learning_rate": 7.571358178063039e-06, "loss": 17.1509, "step": 19051 }, { "epoch": 0.3482552506991793, "grad_norm": 6.705735662361387, "learning_rate": 7.571104305963792e-06, "loss": 17.7934, "step": 19052 }, { "epoch": 0.34827352989562577, "grad_norm": 5.3881058828175705, "learning_rate": 7.570850424853049e-06, "loss": 16.9258, "step": 19053 }, { "epoch": 0.3482918090920723, "grad_norm": 6.8951777748095635, "learning_rate": 7.570596534731699e-06, "loss": 17.8119, "step": 19054 }, { "epoch": 0.34831008828851884, "grad_norm": 5.937914246025563, "learning_rate": 7.570342635600632e-06, "loss": 17.2293, "step": 19055 }, { "epoch": 0.3483283674849654, "grad_norm": 6.921626001753688, "learning_rate": 7.570088727460735e-06, "loss": 17.7745, "step": 19056 }, { "epoch": 0.3483466466814119, "grad_norm": 8.85207934715673, "learning_rate": 7.5698348103129e-06, "loss": 18.2198, "step": 19057 }, { "epoch": 0.3483649258778584, "grad_norm": 5.969737430063275, "learning_rate": 7.569580884158017e-06, "loss": 17.1383, "step": 19058 }, { "epoch": 0.3483832050743049, "grad_norm": 7.2601443315016585, "learning_rate": 7.569326948996978e-06, "loss": 17.7702, "step": 19059 }, { "epoch": 0.34840148427075146, "grad_norm": 7.75588757033667, "learning_rate": 7.569073004830669e-06, "loss": 17.9613, "step": 19060 }, { "epoch": 0.348419763467198, "grad_norm": 5.469352595144674, "learning_rate": 7.568819051659983e-06, "loss": 17.4016, "step": 19061 }, { "epoch": 0.34843804266364453, "grad_norm": 6.614436525311249, "learning_rate": 7.568565089485809e-06, "loss": 17.7536, "step": 19062 }, { "epoch": 0.348456321860091, "grad_norm": 5.400787195704766, "learning_rate": 7.568311118309035e-06, "loss": 17.1057, "step": 19063 }, { "epoch": 0.34847460105653755, "grad_norm": 6.276006433668197, "learning_rate": 7.5680571381305555e-06, "loss": 17.4418, "step": 19064 }, { "epoch": 0.3484928802529841, "grad_norm": 5.90012870807546, "learning_rate": 7.567803148951259e-06, "loss": 17.2305, "step": 19065 }, { "epoch": 0.3485111594494306, "grad_norm": 9.806247923713876, "learning_rate": 7.5675491507720355e-06, "loss": 18.9236, "step": 19066 }, { "epoch": 0.34852943864587715, "grad_norm": 5.767030138984717, "learning_rate": 7.567295143593774e-06, "loss": 17.2684, "step": 19067 }, { "epoch": 0.34854771784232363, "grad_norm": 7.542900090242737, "learning_rate": 7.567041127417367e-06, "loss": 18.4578, "step": 19068 }, { "epoch": 0.34856599703877017, "grad_norm": 5.341792812164293, "learning_rate": 7.5667871022437025e-06, "loss": 17.1259, "step": 19069 }, { "epoch": 0.3485842762352167, "grad_norm": 7.385116990771949, "learning_rate": 7.566533068073671e-06, "loss": 18.0127, "step": 19070 }, { "epoch": 0.34860255543166324, "grad_norm": 7.847511809859978, "learning_rate": 7.566279024908165e-06, "loss": 18.119, "step": 19071 }, { "epoch": 0.3486208346281097, "grad_norm": 6.816524743102691, "learning_rate": 7.566024972748074e-06, "loss": 17.5523, "step": 19072 }, { "epoch": 0.34863911382455626, "grad_norm": 7.3187033101902355, "learning_rate": 7.565770911594288e-06, "loss": 18.1721, "step": 19073 }, { "epoch": 0.3486573930210028, "grad_norm": 7.181470743501008, "learning_rate": 7.565516841447698e-06, "loss": 17.9119, "step": 19074 }, { "epoch": 0.34867567221744933, "grad_norm": 8.020648660161886, "learning_rate": 7.565262762309194e-06, "loss": 18.5753, "step": 19075 }, { "epoch": 0.34869395141389586, "grad_norm": 7.215914984143746, "learning_rate": 7.565008674179666e-06, "loss": 17.8941, "step": 19076 }, { "epoch": 0.34871223061034234, "grad_norm": 6.445584663850671, "learning_rate": 7.564754577060005e-06, "loss": 17.6232, "step": 19077 }, { "epoch": 0.3487305098067889, "grad_norm": 7.515743718445765, "learning_rate": 7.5645004709511015e-06, "loss": 17.7808, "step": 19078 }, { "epoch": 0.3487487890032354, "grad_norm": 7.9065855217742165, "learning_rate": 7.564246355853846e-06, "loss": 17.7412, "step": 19079 }, { "epoch": 0.34876706819968195, "grad_norm": 7.175318809610735, "learning_rate": 7.563992231769131e-06, "loss": 18.122, "step": 19080 }, { "epoch": 0.3487853473961285, "grad_norm": 6.167743180577843, "learning_rate": 7.563738098697846e-06, "loss": 17.301, "step": 19081 }, { "epoch": 0.34880362659257497, "grad_norm": 7.036348573682763, "learning_rate": 7.56348395664088e-06, "loss": 17.7892, "step": 19082 }, { "epoch": 0.3488219057890215, "grad_norm": 7.608559451051648, "learning_rate": 7.563229805599126e-06, "loss": 18.3402, "step": 19083 }, { "epoch": 0.34884018498546804, "grad_norm": 5.978868292873253, "learning_rate": 7.562975645573473e-06, "loss": 17.487, "step": 19084 }, { "epoch": 0.3488584641819146, "grad_norm": 6.081142939739899, "learning_rate": 7.5627214765648134e-06, "loss": 17.2917, "step": 19085 }, { "epoch": 0.3488767433783611, "grad_norm": 6.354029078628353, "learning_rate": 7.562467298574039e-06, "loss": 17.5563, "step": 19086 }, { "epoch": 0.3488950225748076, "grad_norm": 6.797968928490002, "learning_rate": 7.562213111602037e-06, "loss": 17.316, "step": 19087 }, { "epoch": 0.3489133017712541, "grad_norm": 5.105401632683817, "learning_rate": 7.5619589156497004e-06, "loss": 17.0003, "step": 19088 }, { "epoch": 0.34893158096770066, "grad_norm": 6.572664449949223, "learning_rate": 7.561704710717921e-06, "loss": 17.4657, "step": 19089 }, { "epoch": 0.3489498601641472, "grad_norm": 6.692513585854958, "learning_rate": 7.5614504968075895e-06, "loss": 17.6388, "step": 19090 }, { "epoch": 0.34896813936059373, "grad_norm": 6.308701198746419, "learning_rate": 7.561196273919596e-06, "loss": 17.6996, "step": 19091 }, { "epoch": 0.3489864185570402, "grad_norm": 8.232221482867809, "learning_rate": 7.560942042054831e-06, "loss": 18.2663, "step": 19092 }, { "epoch": 0.34900469775348675, "grad_norm": 5.985624416470622, "learning_rate": 7.560687801214186e-06, "loss": 17.2788, "step": 19093 }, { "epoch": 0.3490229769499333, "grad_norm": 8.288715722382547, "learning_rate": 7.560433551398553e-06, "loss": 18.4856, "step": 19094 }, { "epoch": 0.3490412561463798, "grad_norm": 6.881455031953275, "learning_rate": 7.560179292608823e-06, "loss": 17.5496, "step": 19095 }, { "epoch": 0.34905953534282635, "grad_norm": 6.606205236103944, "learning_rate": 7.5599250248458864e-06, "loss": 17.4679, "step": 19096 }, { "epoch": 0.34907781453927283, "grad_norm": 6.94821201997258, "learning_rate": 7.559670748110634e-06, "loss": 17.885, "step": 19097 }, { "epoch": 0.34909609373571937, "grad_norm": 7.050968024514505, "learning_rate": 7.559416462403959e-06, "loss": 18.0848, "step": 19098 }, { "epoch": 0.3491143729321659, "grad_norm": 6.783862967584317, "learning_rate": 7.559162167726751e-06, "loss": 17.2696, "step": 19099 }, { "epoch": 0.34913265212861244, "grad_norm": 6.884680949415629, "learning_rate": 7.558907864079901e-06, "loss": 17.5352, "step": 19100 }, { "epoch": 0.349150931325059, "grad_norm": 7.496809186050319, "learning_rate": 7.558653551464301e-06, "loss": 18.0601, "step": 19101 }, { "epoch": 0.34916921052150546, "grad_norm": 7.9261805044850115, "learning_rate": 7.558399229880843e-06, "loss": 18.2118, "step": 19102 }, { "epoch": 0.349187489717952, "grad_norm": 8.338104394501705, "learning_rate": 7.558144899330418e-06, "loss": 18.584, "step": 19103 }, { "epoch": 0.34920576891439853, "grad_norm": 6.305445602880384, "learning_rate": 7.557890559813916e-06, "loss": 17.5534, "step": 19104 }, { "epoch": 0.34922404811084506, "grad_norm": 6.472521795684011, "learning_rate": 7.557636211332231e-06, "loss": 17.5505, "step": 19105 }, { "epoch": 0.34924232730729154, "grad_norm": 6.9698569968533866, "learning_rate": 7.557381853886252e-06, "loss": 17.612, "step": 19106 }, { "epoch": 0.3492606065037381, "grad_norm": 6.984919849468305, "learning_rate": 7.557127487476872e-06, "loss": 17.6495, "step": 19107 }, { "epoch": 0.3492788857001846, "grad_norm": 6.338068856962003, "learning_rate": 7.556873112104981e-06, "loss": 17.7788, "step": 19108 }, { "epoch": 0.34929716489663115, "grad_norm": 7.984494388826529, "learning_rate": 7.556618727771473e-06, "loss": 18.0341, "step": 19109 }, { "epoch": 0.3493154440930777, "grad_norm": 5.431516007249899, "learning_rate": 7.5563643344772385e-06, "loss": 17.1211, "step": 19110 }, { "epoch": 0.34933372328952417, "grad_norm": 6.056252371829921, "learning_rate": 7.5561099322231665e-06, "loss": 17.3791, "step": 19111 }, { "epoch": 0.3493520024859707, "grad_norm": 7.158640698573915, "learning_rate": 7.555855521010153e-06, "loss": 17.7733, "step": 19112 }, { "epoch": 0.34937028168241724, "grad_norm": 7.090888065074658, "learning_rate": 7.555601100839087e-06, "loss": 17.8602, "step": 19113 }, { "epoch": 0.3493885608788638, "grad_norm": 6.705794721948667, "learning_rate": 7.555346671710861e-06, "loss": 17.6568, "step": 19114 }, { "epoch": 0.3494068400753103, "grad_norm": 6.4504616451705346, "learning_rate": 7.555092233626367e-06, "loss": 17.4211, "step": 19115 }, { "epoch": 0.3494251192717568, "grad_norm": 7.092298629616406, "learning_rate": 7.5548377865864955e-06, "loss": 17.7163, "step": 19116 }, { "epoch": 0.3494433984682033, "grad_norm": 7.2102206459298746, "learning_rate": 7.554583330592141e-06, "loss": 18.0103, "step": 19117 }, { "epoch": 0.34946167766464986, "grad_norm": 7.2977341125012325, "learning_rate": 7.554328865644193e-06, "loss": 17.9424, "step": 19118 }, { "epoch": 0.3494799568610964, "grad_norm": 6.037901914704069, "learning_rate": 7.5540743917435435e-06, "loss": 17.3669, "step": 19119 }, { "epoch": 0.34949823605754293, "grad_norm": 6.128530930107559, "learning_rate": 7.553819908891085e-06, "loss": 17.3773, "step": 19120 }, { "epoch": 0.3495165152539894, "grad_norm": 7.217781933883795, "learning_rate": 7.55356541708771e-06, "loss": 18.0618, "step": 19121 }, { "epoch": 0.34953479445043595, "grad_norm": 7.532066289037688, "learning_rate": 7.553310916334307e-06, "loss": 17.9471, "step": 19122 }, { "epoch": 0.3495530736468825, "grad_norm": 7.884880601530532, "learning_rate": 7.553056406631773e-06, "loss": 18.0946, "step": 19123 }, { "epoch": 0.349571352843329, "grad_norm": 6.906685803731557, "learning_rate": 7.5528018879809985e-06, "loss": 17.5602, "step": 19124 }, { "epoch": 0.34958963203977556, "grad_norm": 6.31357951210493, "learning_rate": 7.552547360382873e-06, "loss": 17.8895, "step": 19125 }, { "epoch": 0.34960791123622204, "grad_norm": 5.962814846618306, "learning_rate": 7.552292823838292e-06, "loss": 17.2481, "step": 19126 }, { "epoch": 0.34962619043266857, "grad_norm": 5.733791137215473, "learning_rate": 7.552038278348146e-06, "loss": 17.3406, "step": 19127 }, { "epoch": 0.3496444696291151, "grad_norm": 6.127260787737624, "learning_rate": 7.5517837239133275e-06, "loss": 17.4258, "step": 19128 }, { "epoch": 0.34966274882556164, "grad_norm": 5.498357279762782, "learning_rate": 7.551529160534727e-06, "loss": 17.0711, "step": 19129 }, { "epoch": 0.3496810280220082, "grad_norm": 7.139696871622537, "learning_rate": 7.55127458821324e-06, "loss": 17.6185, "step": 19130 }, { "epoch": 0.34969930721845466, "grad_norm": 5.361238809589072, "learning_rate": 7.551020006949756e-06, "loss": 17.1274, "step": 19131 }, { "epoch": 0.3497175864149012, "grad_norm": 5.661615351669901, "learning_rate": 7.5507654167451684e-06, "loss": 17.1587, "step": 19132 }, { "epoch": 0.34973586561134773, "grad_norm": 7.1297421996471515, "learning_rate": 7.550510817600369e-06, "loss": 17.9598, "step": 19133 }, { "epoch": 0.34975414480779426, "grad_norm": 6.251832945188568, "learning_rate": 7.5502562095162516e-06, "loss": 17.431, "step": 19134 }, { "epoch": 0.3497724240042408, "grad_norm": 6.8688043884264065, "learning_rate": 7.550001592493705e-06, "loss": 17.7732, "step": 19135 }, { "epoch": 0.3497907032006873, "grad_norm": 7.889183889188873, "learning_rate": 7.549746966533627e-06, "loss": 17.9789, "step": 19136 }, { "epoch": 0.3498089823971338, "grad_norm": 5.877786857054396, "learning_rate": 7.5494923316369075e-06, "loss": 17.3148, "step": 19137 }, { "epoch": 0.34982726159358035, "grad_norm": 6.4262491059760976, "learning_rate": 7.549237687804436e-06, "loss": 17.6071, "step": 19138 }, { "epoch": 0.3498455407900269, "grad_norm": 8.282359710148388, "learning_rate": 7.548983035037107e-06, "loss": 18.281, "step": 19139 }, { "epoch": 0.34986381998647337, "grad_norm": 7.108549038253616, "learning_rate": 7.548728373335816e-06, "loss": 17.8754, "step": 19140 }, { "epoch": 0.3498820991829199, "grad_norm": 8.155913885820201, "learning_rate": 7.548473702701453e-06, "loss": 17.8797, "step": 19141 }, { "epoch": 0.34990037837936644, "grad_norm": 6.6169587638016925, "learning_rate": 7.54821902313491e-06, "loss": 17.6922, "step": 19142 }, { "epoch": 0.349918657575813, "grad_norm": 7.043672133519509, "learning_rate": 7.547964334637081e-06, "loss": 18.2744, "step": 19143 }, { "epoch": 0.3499369367722595, "grad_norm": 9.829544045235695, "learning_rate": 7.547709637208859e-06, "loss": 17.7204, "step": 19144 }, { "epoch": 0.349955215968706, "grad_norm": 6.093992746556152, "learning_rate": 7.547454930851135e-06, "loss": 17.446, "step": 19145 }, { "epoch": 0.3499734951651525, "grad_norm": 6.498279843625785, "learning_rate": 7.5472002155648015e-06, "loss": 17.5197, "step": 19146 }, { "epoch": 0.34999177436159906, "grad_norm": 6.192123225655105, "learning_rate": 7.5469454913507534e-06, "loss": 17.5905, "step": 19147 }, { "epoch": 0.3500100535580456, "grad_norm": 7.238979418550019, "learning_rate": 7.546690758209883e-06, "loss": 17.7479, "step": 19148 }, { "epoch": 0.35002833275449213, "grad_norm": 7.023267655072233, "learning_rate": 7.5464360161430805e-06, "loss": 17.6625, "step": 19149 }, { "epoch": 0.3500466119509386, "grad_norm": 6.64545949456632, "learning_rate": 7.546181265151241e-06, "loss": 17.5238, "step": 19150 }, { "epoch": 0.35006489114738515, "grad_norm": 7.66921051515604, "learning_rate": 7.545926505235258e-06, "loss": 18.1588, "step": 19151 }, { "epoch": 0.3500831703438317, "grad_norm": 7.1549883339917955, "learning_rate": 7.545671736396023e-06, "loss": 17.8666, "step": 19152 }, { "epoch": 0.3501014495402782, "grad_norm": 7.647805578815517, "learning_rate": 7.545416958634431e-06, "loss": 18.2531, "step": 19153 }, { "epoch": 0.35011972873672476, "grad_norm": 4.906920746033703, "learning_rate": 7.5451621719513725e-06, "loss": 16.9204, "step": 19154 }, { "epoch": 0.35013800793317124, "grad_norm": 6.69855203244943, "learning_rate": 7.544907376347741e-06, "loss": 17.762, "step": 19155 }, { "epoch": 0.35015628712961777, "grad_norm": 6.688795121293713, "learning_rate": 7.544652571824429e-06, "loss": 17.6316, "step": 19156 }, { "epoch": 0.3501745663260643, "grad_norm": 7.152484763408261, "learning_rate": 7.544397758382331e-06, "loss": 17.6217, "step": 19157 }, { "epoch": 0.35019284552251084, "grad_norm": 5.769786048137639, "learning_rate": 7.544142936022342e-06, "loss": 17.3694, "step": 19158 }, { "epoch": 0.3502111247189574, "grad_norm": 5.942327537010234, "learning_rate": 7.543888104745352e-06, "loss": 17.4199, "step": 19159 }, { "epoch": 0.35022940391540386, "grad_norm": 8.713755063742017, "learning_rate": 7.543633264552253e-06, "loss": 18.8002, "step": 19160 }, { "epoch": 0.3502476831118504, "grad_norm": 6.111676468636414, "learning_rate": 7.5433784154439425e-06, "loss": 17.347, "step": 19161 }, { "epoch": 0.35026596230829693, "grad_norm": 5.696435753875319, "learning_rate": 7.5431235574213104e-06, "loss": 17.2735, "step": 19162 }, { "epoch": 0.35028424150474347, "grad_norm": 6.897497797903919, "learning_rate": 7.54286869048525e-06, "loss": 17.7057, "step": 19163 }, { "epoch": 0.35030252070119, "grad_norm": 7.618375172339585, "learning_rate": 7.542613814636655e-06, "loss": 18.0204, "step": 19164 }, { "epoch": 0.3503207998976365, "grad_norm": 6.664557194664785, "learning_rate": 7.542358929876421e-06, "loss": 17.899, "step": 19165 }, { "epoch": 0.350339079094083, "grad_norm": 6.527456188713896, "learning_rate": 7.5421040362054385e-06, "loss": 17.7848, "step": 19166 }, { "epoch": 0.35035735829052955, "grad_norm": 6.481927320564349, "learning_rate": 7.541849133624603e-06, "loss": 17.5153, "step": 19167 }, { "epoch": 0.3503756374869761, "grad_norm": 6.508228343221474, "learning_rate": 7.541594222134807e-06, "loss": 17.4626, "step": 19168 }, { "epoch": 0.3503939166834226, "grad_norm": 6.364981374422483, "learning_rate": 7.541339301736943e-06, "loss": 17.3569, "step": 19169 }, { "epoch": 0.3504121958798691, "grad_norm": 7.083302689962924, "learning_rate": 7.541084372431904e-06, "loss": 17.8332, "step": 19170 }, { "epoch": 0.35043047507631564, "grad_norm": 7.3664542017646895, "learning_rate": 7.5408294342205866e-06, "loss": 17.9209, "step": 19171 }, { "epoch": 0.3504487542727622, "grad_norm": 6.621397043851026, "learning_rate": 7.540574487103882e-06, "loss": 17.7306, "step": 19172 }, { "epoch": 0.3504670334692087, "grad_norm": 6.072460447216706, "learning_rate": 7.540319531082685e-06, "loss": 17.151, "step": 19173 }, { "epoch": 0.3504853126656552, "grad_norm": 6.006454141057217, "learning_rate": 7.540064566157887e-06, "loss": 17.1267, "step": 19174 }, { "epoch": 0.3505035918621017, "grad_norm": 5.747981494818808, "learning_rate": 7.539809592330385e-06, "loss": 17.1149, "step": 19175 }, { "epoch": 0.35052187105854826, "grad_norm": 6.813425080652564, "learning_rate": 7.539554609601069e-06, "loss": 17.6495, "step": 19176 }, { "epoch": 0.3505401502549948, "grad_norm": 7.750637941850223, "learning_rate": 7.539299617970834e-06, "loss": 17.9969, "step": 19177 }, { "epoch": 0.35055842945144133, "grad_norm": 7.154887239422486, "learning_rate": 7.539044617440576e-06, "loss": 17.5975, "step": 19178 }, { "epoch": 0.3505767086478878, "grad_norm": 5.696516250028021, "learning_rate": 7.538789608011185e-06, "loss": 17.2689, "step": 19179 }, { "epoch": 0.35059498784433435, "grad_norm": 6.376758840470761, "learning_rate": 7.53853458968356e-06, "loss": 17.5849, "step": 19180 }, { "epoch": 0.3506132670407809, "grad_norm": 5.269383120275941, "learning_rate": 7.538279562458588e-06, "loss": 17.1573, "step": 19181 }, { "epoch": 0.3506315462372274, "grad_norm": 7.144242085682909, "learning_rate": 7.538024526337169e-06, "loss": 18.1725, "step": 19182 }, { "epoch": 0.35064982543367396, "grad_norm": 6.782469407083449, "learning_rate": 7.537769481320194e-06, "loss": 17.6427, "step": 19183 }, { "epoch": 0.35066810463012044, "grad_norm": 6.493324414981373, "learning_rate": 7.537514427408555e-06, "loss": 17.4828, "step": 19184 }, { "epoch": 0.35068638382656697, "grad_norm": 6.67935934419827, "learning_rate": 7.537259364603149e-06, "loss": 17.6403, "step": 19185 }, { "epoch": 0.3507046630230135, "grad_norm": 6.6564380539045604, "learning_rate": 7.53700429290487e-06, "loss": 17.708, "step": 19186 }, { "epoch": 0.35072294221946004, "grad_norm": 7.4398656587598895, "learning_rate": 7.536749212314611e-06, "loss": 17.5967, "step": 19187 }, { "epoch": 0.3507412214159066, "grad_norm": 7.525312261833701, "learning_rate": 7.5364941228332655e-06, "loss": 17.6407, "step": 19188 }, { "epoch": 0.35075950061235306, "grad_norm": 6.326946912306059, "learning_rate": 7.536239024461729e-06, "loss": 17.5081, "step": 19189 }, { "epoch": 0.3507777798087996, "grad_norm": 6.086980055709366, "learning_rate": 7.535983917200895e-06, "loss": 17.3881, "step": 19190 }, { "epoch": 0.35079605900524613, "grad_norm": 5.687678816216126, "learning_rate": 7.535728801051656e-06, "loss": 17.2807, "step": 19191 }, { "epoch": 0.35081433820169267, "grad_norm": 6.775249668295368, "learning_rate": 7.5354736760149085e-06, "loss": 17.5426, "step": 19192 }, { "epoch": 0.3508326173981392, "grad_norm": 4.997770310487413, "learning_rate": 7.535218542091546e-06, "loss": 16.8495, "step": 19193 }, { "epoch": 0.3508508965945857, "grad_norm": 8.67709245403442, "learning_rate": 7.534963399282462e-06, "loss": 17.9259, "step": 19194 }, { "epoch": 0.3508691757910322, "grad_norm": 7.081679979038991, "learning_rate": 7.5347082475885515e-06, "loss": 17.7819, "step": 19195 }, { "epoch": 0.35088745498747875, "grad_norm": 6.15128860725821, "learning_rate": 7.534453087010709e-06, "loss": 17.4622, "step": 19196 }, { "epoch": 0.3509057341839253, "grad_norm": 9.454014416296994, "learning_rate": 7.534197917549827e-06, "loss": 18.5688, "step": 19197 }, { "epoch": 0.3509240133803718, "grad_norm": 5.653357758714179, "learning_rate": 7.533942739206802e-06, "loss": 17.0181, "step": 19198 }, { "epoch": 0.3509422925768183, "grad_norm": 8.371572714131837, "learning_rate": 7.533687551982529e-06, "loss": 18.4663, "step": 19199 }, { "epoch": 0.35096057177326484, "grad_norm": 5.903768986161092, "learning_rate": 7.533432355877899e-06, "loss": 17.2126, "step": 19200 }, { "epoch": 0.3509788509697114, "grad_norm": 7.8339977387852615, "learning_rate": 7.533177150893809e-06, "loss": 17.8432, "step": 19201 }, { "epoch": 0.3509971301661579, "grad_norm": 5.658744341429674, "learning_rate": 7.532921937031153e-06, "loss": 17.3167, "step": 19202 }, { "epoch": 0.35101540936260445, "grad_norm": 6.03939753372727, "learning_rate": 7.532666714290826e-06, "loss": 16.9948, "step": 19203 }, { "epoch": 0.3510336885590509, "grad_norm": 6.768835174965798, "learning_rate": 7.532411482673721e-06, "loss": 17.5395, "step": 19204 }, { "epoch": 0.35105196775549746, "grad_norm": 5.418941345037018, "learning_rate": 7.532156242180734e-06, "loss": 17.0491, "step": 19205 }, { "epoch": 0.351070246951944, "grad_norm": 6.822958325551641, "learning_rate": 7.531900992812759e-06, "loss": 17.5348, "step": 19206 }, { "epoch": 0.35108852614839053, "grad_norm": 7.108892069534108, "learning_rate": 7.531645734570689e-06, "loss": 17.9473, "step": 19207 }, { "epoch": 0.351106805344837, "grad_norm": 6.974757906404995, "learning_rate": 7.531390467455424e-06, "loss": 18.0174, "step": 19208 }, { "epoch": 0.35112508454128355, "grad_norm": 7.23199396986483, "learning_rate": 7.531135191467852e-06, "loss": 17.9606, "step": 19209 }, { "epoch": 0.3511433637377301, "grad_norm": 6.282276272652066, "learning_rate": 7.530879906608873e-06, "loss": 17.4516, "step": 19210 }, { "epoch": 0.3511616429341766, "grad_norm": 6.7981872820744895, "learning_rate": 7.530624612879378e-06, "loss": 17.8349, "step": 19211 }, { "epoch": 0.35117992213062316, "grad_norm": 7.898551184926161, "learning_rate": 7.5303693102802635e-06, "loss": 17.8937, "step": 19212 }, { "epoch": 0.35119820132706964, "grad_norm": 5.46829284366765, "learning_rate": 7.530113998812425e-06, "loss": 17.1842, "step": 19213 }, { "epoch": 0.3512164805235162, "grad_norm": 6.301265062633721, "learning_rate": 7.529858678476756e-06, "loss": 17.6048, "step": 19214 }, { "epoch": 0.3512347597199627, "grad_norm": 7.420612252023315, "learning_rate": 7.529603349274152e-06, "loss": 17.5682, "step": 19215 }, { "epoch": 0.35125303891640924, "grad_norm": 6.836806153714951, "learning_rate": 7.529348011205506e-06, "loss": 17.6979, "step": 19216 }, { "epoch": 0.3512713181128558, "grad_norm": 7.2037295493918085, "learning_rate": 7.529092664271718e-06, "loss": 17.8771, "step": 19217 }, { "epoch": 0.35128959730930226, "grad_norm": 6.491421191766841, "learning_rate": 7.528837308473678e-06, "loss": 17.7311, "step": 19218 }, { "epoch": 0.3513078765057488, "grad_norm": 6.726707260941049, "learning_rate": 7.5285819438122805e-06, "loss": 17.7142, "step": 19219 }, { "epoch": 0.35132615570219533, "grad_norm": 6.129149885948185, "learning_rate": 7.528326570288427e-06, "loss": 17.4959, "step": 19220 }, { "epoch": 0.35134443489864187, "grad_norm": 5.846540043871445, "learning_rate": 7.5280711879030055e-06, "loss": 17.2388, "step": 19221 }, { "epoch": 0.3513627140950884, "grad_norm": 8.91938466865475, "learning_rate": 7.527815796656914e-06, "loss": 18.7418, "step": 19222 }, { "epoch": 0.3513809932915349, "grad_norm": 8.019711184974327, "learning_rate": 7.527560396551048e-06, "loss": 18.2656, "step": 19223 }, { "epoch": 0.3513992724879814, "grad_norm": 6.768107675353159, "learning_rate": 7.527304987586301e-06, "loss": 17.799, "step": 19224 }, { "epoch": 0.35141755168442795, "grad_norm": 5.558738259085692, "learning_rate": 7.527049569763571e-06, "loss": 17.122, "step": 19225 }, { "epoch": 0.3514358308808745, "grad_norm": 7.115017050290701, "learning_rate": 7.526794143083751e-06, "loss": 17.7497, "step": 19226 }, { "epoch": 0.351454110077321, "grad_norm": 6.686023231461101, "learning_rate": 7.526538707547737e-06, "loss": 17.4914, "step": 19227 }, { "epoch": 0.3514723892737675, "grad_norm": 7.005102727356235, "learning_rate": 7.526283263156424e-06, "loss": 17.7012, "step": 19228 }, { "epoch": 0.35149066847021404, "grad_norm": 6.39245729381603, "learning_rate": 7.526027809910706e-06, "loss": 17.5251, "step": 19229 }, { "epoch": 0.3515089476666606, "grad_norm": 6.653919114430062, "learning_rate": 7.525772347811482e-06, "loss": 17.686, "step": 19230 }, { "epoch": 0.3515272268631071, "grad_norm": 6.929301038300203, "learning_rate": 7.5255168768596444e-06, "loss": 17.8971, "step": 19231 }, { "epoch": 0.35154550605955365, "grad_norm": 5.498475126930915, "learning_rate": 7.52526139705609e-06, "loss": 17.2416, "step": 19232 }, { "epoch": 0.3515637852560001, "grad_norm": 6.282802389198806, "learning_rate": 7.525005908401711e-06, "loss": 17.5791, "step": 19233 }, { "epoch": 0.35158206445244666, "grad_norm": 6.470919523886569, "learning_rate": 7.5247504108974075e-06, "loss": 17.6202, "step": 19234 }, { "epoch": 0.3516003436488932, "grad_norm": 5.620344683425913, "learning_rate": 7.524494904544072e-06, "loss": 17.3058, "step": 19235 }, { "epoch": 0.35161862284533973, "grad_norm": 5.405100648432202, "learning_rate": 7.524239389342601e-06, "loss": 17.2389, "step": 19236 }, { "epoch": 0.35163690204178627, "grad_norm": 6.056074402243854, "learning_rate": 7.523983865293891e-06, "loss": 17.3652, "step": 19237 }, { "epoch": 0.35165518123823275, "grad_norm": 6.845948434940423, "learning_rate": 7.523728332398834e-06, "loss": 17.7525, "step": 19238 }, { "epoch": 0.3516734604346793, "grad_norm": 5.678162363390758, "learning_rate": 7.523472790658331e-06, "loss": 17.2832, "step": 19239 }, { "epoch": 0.3516917396311258, "grad_norm": 6.276029721847072, "learning_rate": 7.523217240073273e-06, "loss": 17.2178, "step": 19240 }, { "epoch": 0.35171001882757236, "grad_norm": 6.771556626423765, "learning_rate": 7.52296168064456e-06, "loss": 17.5713, "step": 19241 }, { "epoch": 0.35172829802401884, "grad_norm": 6.9140068797528595, "learning_rate": 7.522706112373083e-06, "loss": 17.5612, "step": 19242 }, { "epoch": 0.3517465772204654, "grad_norm": 5.268949832379009, "learning_rate": 7.5224505352597395e-06, "loss": 17.1115, "step": 19243 }, { "epoch": 0.3517648564169119, "grad_norm": 8.420649134444037, "learning_rate": 7.522194949305428e-06, "loss": 17.0724, "step": 19244 }, { "epoch": 0.35178313561335844, "grad_norm": 6.6024701703151845, "learning_rate": 7.5219393545110406e-06, "loss": 17.6513, "step": 19245 }, { "epoch": 0.351801414809805, "grad_norm": 5.5397313205401115, "learning_rate": 7.521683750877475e-06, "loss": 17.143, "step": 19246 }, { "epoch": 0.35181969400625146, "grad_norm": 7.0635908472449245, "learning_rate": 7.521428138405626e-06, "loss": 17.8985, "step": 19247 }, { "epoch": 0.351837973202698, "grad_norm": 6.690937779179455, "learning_rate": 7.521172517096391e-06, "loss": 17.5753, "step": 19248 }, { "epoch": 0.35185625239914453, "grad_norm": 5.162150717931989, "learning_rate": 7.520916886950664e-06, "loss": 16.9441, "step": 19249 }, { "epoch": 0.35187453159559107, "grad_norm": 7.542734166513863, "learning_rate": 7.520661247969343e-06, "loss": 17.9402, "step": 19250 }, { "epoch": 0.3518928107920376, "grad_norm": 6.2321172952593455, "learning_rate": 7.520405600153324e-06, "loss": 17.4276, "step": 19251 }, { "epoch": 0.3519110899884841, "grad_norm": 6.189843951485729, "learning_rate": 7.520149943503501e-06, "loss": 17.4252, "step": 19252 }, { "epoch": 0.3519293691849306, "grad_norm": 6.775092305787856, "learning_rate": 7.5198942780207705e-06, "loss": 17.6616, "step": 19253 }, { "epoch": 0.35194764838137715, "grad_norm": 5.865178419230509, "learning_rate": 7.519638603706029e-06, "loss": 17.066, "step": 19254 }, { "epoch": 0.3519659275778237, "grad_norm": 7.121064231289921, "learning_rate": 7.519382920560175e-06, "loss": 17.9656, "step": 19255 }, { "epoch": 0.3519842067742702, "grad_norm": 7.603032854869226, "learning_rate": 7.519127228584101e-06, "loss": 17.9382, "step": 19256 }, { "epoch": 0.3520024859707167, "grad_norm": 6.441960309050005, "learning_rate": 7.5188715277787035e-06, "loss": 18.0542, "step": 19257 }, { "epoch": 0.35202076516716324, "grad_norm": 6.6675001435748245, "learning_rate": 7.518615818144883e-06, "loss": 17.1855, "step": 19258 }, { "epoch": 0.3520390443636098, "grad_norm": 7.671842857573003, "learning_rate": 7.51836009968353e-06, "loss": 17.9289, "step": 19259 }, { "epoch": 0.3520573235600563, "grad_norm": 5.406344839963338, "learning_rate": 7.518104372395545e-06, "loss": 17.2156, "step": 19260 }, { "epoch": 0.35207560275650285, "grad_norm": 7.403033251888348, "learning_rate": 7.5178486362818215e-06, "loss": 18.0308, "step": 19261 }, { "epoch": 0.35209388195294933, "grad_norm": 7.1430028300022945, "learning_rate": 7.517592891343258e-06, "loss": 17.698, "step": 19262 }, { "epoch": 0.35211216114939586, "grad_norm": 9.051370413187286, "learning_rate": 7.517337137580749e-06, "loss": 18.4366, "step": 19263 }, { "epoch": 0.3521304403458424, "grad_norm": 6.552307942762419, "learning_rate": 7.517081374995192e-06, "loss": 17.4133, "step": 19264 }, { "epoch": 0.35214871954228893, "grad_norm": 6.255774953362444, "learning_rate": 7.516825603587483e-06, "loss": 17.5453, "step": 19265 }, { "epoch": 0.35216699873873547, "grad_norm": 6.4441058435343725, "learning_rate": 7.516569823358519e-06, "loss": 17.49, "step": 19266 }, { "epoch": 0.35218527793518195, "grad_norm": 7.242224969692995, "learning_rate": 7.516314034309197e-06, "loss": 18.3363, "step": 19267 }, { "epoch": 0.3522035571316285, "grad_norm": 6.158568205970019, "learning_rate": 7.516058236440412e-06, "loss": 17.394, "step": 19268 }, { "epoch": 0.352221836328075, "grad_norm": 7.229432679066626, "learning_rate": 7.515802429753061e-06, "loss": 17.947, "step": 19269 }, { "epoch": 0.35224011552452156, "grad_norm": 6.939608841008816, "learning_rate": 7.515546614248039e-06, "loss": 17.6991, "step": 19270 }, { "epoch": 0.3522583947209681, "grad_norm": 5.790529395723205, "learning_rate": 7.515290789926248e-06, "loss": 17.3236, "step": 19271 }, { "epoch": 0.3522766739174146, "grad_norm": 5.226285675220542, "learning_rate": 7.515034956788579e-06, "loss": 17.0536, "step": 19272 }, { "epoch": 0.3522949531138611, "grad_norm": 6.510252246174526, "learning_rate": 7.514779114835931e-06, "loss": 17.729, "step": 19273 }, { "epoch": 0.35231323231030764, "grad_norm": 6.164202259826949, "learning_rate": 7.5145232640692e-06, "loss": 17.4468, "step": 19274 }, { "epoch": 0.3523315115067542, "grad_norm": 5.663787925761229, "learning_rate": 7.514267404489284e-06, "loss": 17.3572, "step": 19275 }, { "epoch": 0.35234979070320066, "grad_norm": 6.53771164186853, "learning_rate": 7.514011536097079e-06, "loss": 17.3409, "step": 19276 }, { "epoch": 0.3523680698996472, "grad_norm": 5.923042661465154, "learning_rate": 7.51375565889348e-06, "loss": 17.332, "step": 19277 }, { "epoch": 0.35238634909609373, "grad_norm": 6.503639029233199, "learning_rate": 7.513499772879387e-06, "loss": 17.5271, "step": 19278 }, { "epoch": 0.35240462829254027, "grad_norm": 6.303507979073592, "learning_rate": 7.513243878055696e-06, "loss": 17.5725, "step": 19279 }, { "epoch": 0.3524229074889868, "grad_norm": 6.265970875824671, "learning_rate": 7.512987974423303e-06, "loss": 17.4912, "step": 19280 }, { "epoch": 0.3524411866854333, "grad_norm": 5.95214388881223, "learning_rate": 7.512732061983103e-06, "loss": 17.37, "step": 19281 }, { "epoch": 0.3524594658818798, "grad_norm": 6.7964495082100855, "learning_rate": 7.512476140735998e-06, "loss": 17.4483, "step": 19282 }, { "epoch": 0.35247774507832635, "grad_norm": 5.8637732923041135, "learning_rate": 7.51222021068288e-06, "loss": 17.5728, "step": 19283 }, { "epoch": 0.3524960242747729, "grad_norm": 16.598796165901515, "learning_rate": 7.51196427182465e-06, "loss": 17.1121, "step": 19284 }, { "epoch": 0.3525143034712194, "grad_norm": 20.40456920183395, "learning_rate": 7.5117083241622014e-06, "loss": 17.6317, "step": 19285 }, { "epoch": 0.3525325826676659, "grad_norm": 29.364647554813363, "learning_rate": 7.511452367696434e-06, "loss": 17.4675, "step": 19286 }, { "epoch": 0.35255086186411244, "grad_norm": 5.421552533264433, "learning_rate": 7.511196402428244e-06, "loss": 17.0389, "step": 19287 }, { "epoch": 0.352569141060559, "grad_norm": 6.622874636689023, "learning_rate": 7.510940428358529e-06, "loss": 17.6747, "step": 19288 }, { "epoch": 0.3525874202570055, "grad_norm": 6.4841809629340625, "learning_rate": 7.510684445488186e-06, "loss": 17.6812, "step": 19289 }, { "epoch": 0.35260569945345205, "grad_norm": 6.109972943276097, "learning_rate": 7.51042845381811e-06, "loss": 17.2907, "step": 19290 }, { "epoch": 0.35262397864989853, "grad_norm": 5.171756038966166, "learning_rate": 7.5101724533492025e-06, "loss": 16.9398, "step": 19291 }, { "epoch": 0.35264225784634506, "grad_norm": 6.6507230063221625, "learning_rate": 7.509916444082357e-06, "loss": 17.5161, "step": 19292 }, { "epoch": 0.3526605370427916, "grad_norm": 5.773007882989047, "learning_rate": 7.509660426018473e-06, "loss": 17.1385, "step": 19293 }, { "epoch": 0.35267881623923814, "grad_norm": 8.749319481203136, "learning_rate": 7.509404399158445e-06, "loss": 18.1071, "step": 19294 }, { "epoch": 0.35269709543568467, "grad_norm": 8.197171997068704, "learning_rate": 7.509148363503174e-06, "loss": 18.0026, "step": 19295 }, { "epoch": 0.35271537463213115, "grad_norm": 12.50595201301948, "learning_rate": 7.508892319053555e-06, "loss": 17.443, "step": 19296 }, { "epoch": 0.3527336538285777, "grad_norm": 6.516132376949015, "learning_rate": 7.508636265810486e-06, "loss": 17.3995, "step": 19297 }, { "epoch": 0.3527519330250242, "grad_norm": 6.613174558109078, "learning_rate": 7.508380203774865e-06, "loss": 17.5949, "step": 19298 }, { "epoch": 0.35277021222147076, "grad_norm": 7.868352150229157, "learning_rate": 7.508124132947589e-06, "loss": 17.9619, "step": 19299 }, { "epoch": 0.3527884914179173, "grad_norm": 7.004765055019461, "learning_rate": 7.507868053329557e-06, "loss": 17.6702, "step": 19300 }, { "epoch": 0.3528067706143638, "grad_norm": 5.577969945626082, "learning_rate": 7.507611964921664e-06, "loss": 17.0379, "step": 19301 }, { "epoch": 0.3528250498108103, "grad_norm": 5.206648365877945, "learning_rate": 7.507355867724807e-06, "loss": 16.8953, "step": 19302 }, { "epoch": 0.35284332900725685, "grad_norm": 7.115165103580579, "learning_rate": 7.5070997617398875e-06, "loss": 17.6558, "step": 19303 }, { "epoch": 0.3528616082037034, "grad_norm": 8.738741552797054, "learning_rate": 7.5068436469678e-06, "loss": 17.2167, "step": 19304 }, { "epoch": 0.3528798874001499, "grad_norm": 6.129962169898521, "learning_rate": 7.506587523409443e-06, "loss": 17.1931, "step": 19305 }, { "epoch": 0.3528981665965964, "grad_norm": 7.6024967870376905, "learning_rate": 7.506331391065714e-06, "loss": 17.9997, "step": 19306 }, { "epoch": 0.35291644579304293, "grad_norm": 7.450007025171821, "learning_rate": 7.5060752499375125e-06, "loss": 17.8514, "step": 19307 }, { "epoch": 0.35293472498948947, "grad_norm": 8.420767894245751, "learning_rate": 7.505819100025733e-06, "loss": 18.2729, "step": 19308 }, { "epoch": 0.352953004185936, "grad_norm": 6.280470968026568, "learning_rate": 7.5055629413312745e-06, "loss": 17.4036, "step": 19309 }, { "epoch": 0.3529712833823825, "grad_norm": 7.887634315817519, "learning_rate": 7.505306773855036e-06, "loss": 18.1748, "step": 19310 }, { "epoch": 0.352989562578829, "grad_norm": 6.721258238027715, "learning_rate": 7.505050597597916e-06, "loss": 17.559, "step": 19311 }, { "epoch": 0.35300784177527555, "grad_norm": 7.314230183760376, "learning_rate": 7.50479441256081e-06, "loss": 17.8109, "step": 19312 }, { "epoch": 0.3530261209717221, "grad_norm": 6.0105027388851155, "learning_rate": 7.504538218744617e-06, "loss": 17.4281, "step": 19313 }, { "epoch": 0.3530444001681686, "grad_norm": 6.292850551068649, "learning_rate": 7.504282016150235e-06, "loss": 17.6397, "step": 19314 }, { "epoch": 0.3530626793646151, "grad_norm": 7.011075822188665, "learning_rate": 7.504025804778561e-06, "loss": 17.8955, "step": 19315 }, { "epoch": 0.35308095856106164, "grad_norm": 6.904144514818356, "learning_rate": 7.503769584630495e-06, "loss": 17.6697, "step": 19316 }, { "epoch": 0.3530992377575082, "grad_norm": 7.152090942929883, "learning_rate": 7.503513355706934e-06, "loss": 18.0321, "step": 19317 }, { "epoch": 0.3531175169539547, "grad_norm": 6.4690979081874485, "learning_rate": 7.5032571180087756e-06, "loss": 17.5206, "step": 19318 }, { "epoch": 0.35313579615040125, "grad_norm": 6.742357135324228, "learning_rate": 7.5030008715369175e-06, "loss": 17.6227, "step": 19319 }, { "epoch": 0.35315407534684773, "grad_norm": 6.9498528970034394, "learning_rate": 7.502744616292259e-06, "loss": 17.718, "step": 19320 }, { "epoch": 0.35317235454329426, "grad_norm": 5.8268840292535256, "learning_rate": 7.502488352275697e-06, "loss": 17.4104, "step": 19321 }, { "epoch": 0.3531906337397408, "grad_norm": 7.403693368819702, "learning_rate": 7.502232079488132e-06, "loss": 17.4577, "step": 19322 }, { "epoch": 0.35320891293618734, "grad_norm": 6.124948314523103, "learning_rate": 7.5019757979304594e-06, "loss": 17.3621, "step": 19323 }, { "epoch": 0.35322719213263387, "grad_norm": 5.397086258924638, "learning_rate": 7.50171950760358e-06, "loss": 17.0504, "step": 19324 }, { "epoch": 0.35324547132908035, "grad_norm": 6.130934468575067, "learning_rate": 7.5014632085083905e-06, "loss": 17.3744, "step": 19325 }, { "epoch": 0.3532637505255269, "grad_norm": 5.84133944166528, "learning_rate": 7.5012069006457876e-06, "loss": 17.518, "step": 19326 }, { "epoch": 0.3532820297219734, "grad_norm": 6.435103065004983, "learning_rate": 7.500950584016675e-06, "loss": 17.6368, "step": 19327 }, { "epoch": 0.35330030891841996, "grad_norm": 6.361098036967545, "learning_rate": 7.500694258621946e-06, "loss": 17.4724, "step": 19328 }, { "epoch": 0.3533185881148665, "grad_norm": 6.2022596023931325, "learning_rate": 7.500437924462498e-06, "loss": 17.6998, "step": 19329 }, { "epoch": 0.353336867311313, "grad_norm": 5.95565398908481, "learning_rate": 7.500181581539236e-06, "loss": 17.5403, "step": 19330 }, { "epoch": 0.3533551465077595, "grad_norm": 6.4444870534955765, "learning_rate": 7.4999252298530536e-06, "loss": 17.6668, "step": 19331 }, { "epoch": 0.35337342570420605, "grad_norm": 7.226446417006764, "learning_rate": 7.4996688694048496e-06, "loss": 17.999, "step": 19332 }, { "epoch": 0.3533917049006526, "grad_norm": 5.130752996598166, "learning_rate": 7.499412500195522e-06, "loss": 16.8309, "step": 19333 }, { "epoch": 0.3534099840970991, "grad_norm": 6.091716027701422, "learning_rate": 7.499156122225972e-06, "loss": 17.1431, "step": 19334 }, { "epoch": 0.3534282632935456, "grad_norm": 5.577210815893012, "learning_rate": 7.498899735497096e-06, "loss": 17.2686, "step": 19335 }, { "epoch": 0.35344654248999213, "grad_norm": 5.420874767043154, "learning_rate": 7.498643340009793e-06, "loss": 17.2443, "step": 19336 }, { "epoch": 0.35346482168643867, "grad_norm": 7.005754200492961, "learning_rate": 7.498386935764964e-06, "loss": 17.7823, "step": 19337 }, { "epoch": 0.3534831008828852, "grad_norm": 7.014225587626757, "learning_rate": 7.498130522763503e-06, "loss": 17.7229, "step": 19338 }, { "epoch": 0.35350138007933174, "grad_norm": 6.815981578344918, "learning_rate": 7.497874101006312e-06, "loss": 17.8219, "step": 19339 }, { "epoch": 0.3535196592757782, "grad_norm": 6.287199660882772, "learning_rate": 7.497617670494289e-06, "loss": 17.5265, "step": 19340 }, { "epoch": 0.35353793847222476, "grad_norm": 7.457545237236673, "learning_rate": 7.497361231228334e-06, "loss": 18.0378, "step": 19341 }, { "epoch": 0.3535562176686713, "grad_norm": 7.807867387794018, "learning_rate": 7.497104783209343e-06, "loss": 18.1411, "step": 19342 }, { "epoch": 0.3535744968651178, "grad_norm": 5.939464419722728, "learning_rate": 7.496848326438218e-06, "loss": 17.237, "step": 19343 }, { "epoch": 0.3535927760615643, "grad_norm": 5.94933347974999, "learning_rate": 7.496591860915855e-06, "loss": 17.6885, "step": 19344 }, { "epoch": 0.35361105525801084, "grad_norm": 7.62622439160612, "learning_rate": 7.496335386643155e-06, "loss": 18.0454, "step": 19345 }, { "epoch": 0.3536293344544574, "grad_norm": 7.654420929465917, "learning_rate": 7.496078903621016e-06, "loss": 17.9037, "step": 19346 }, { "epoch": 0.3536476136509039, "grad_norm": 6.9677134942444, "learning_rate": 7.495822411850335e-06, "loss": 17.8292, "step": 19347 }, { "epoch": 0.35366589284735045, "grad_norm": 7.397537831439291, "learning_rate": 7.495565911332015e-06, "loss": 18.5911, "step": 19348 }, { "epoch": 0.35368417204379693, "grad_norm": 6.928067092409122, "learning_rate": 7.495309402066954e-06, "loss": 17.9352, "step": 19349 }, { "epoch": 0.35370245124024347, "grad_norm": 6.730765795853856, "learning_rate": 7.495052884056048e-06, "loss": 17.6145, "step": 19350 }, { "epoch": 0.35372073043669, "grad_norm": 8.10738099050842, "learning_rate": 7.4947963573001995e-06, "loss": 17.8017, "step": 19351 }, { "epoch": 0.35373900963313654, "grad_norm": 7.93727278428228, "learning_rate": 7.494539821800305e-06, "loss": 17.7669, "step": 19352 }, { "epoch": 0.35375728882958307, "grad_norm": 7.339848526157841, "learning_rate": 7.494283277557266e-06, "loss": 17.6792, "step": 19353 }, { "epoch": 0.35377556802602955, "grad_norm": 7.201721940676704, "learning_rate": 7.49402672457198e-06, "loss": 17.8781, "step": 19354 }, { "epoch": 0.3537938472224761, "grad_norm": 6.697310098952394, "learning_rate": 7.493770162845348e-06, "loss": 17.5622, "step": 19355 }, { "epoch": 0.3538121264189226, "grad_norm": 8.084516537100795, "learning_rate": 7.493513592378265e-06, "loss": 18.0973, "step": 19356 }, { "epoch": 0.35383040561536916, "grad_norm": 5.1961317299465675, "learning_rate": 7.493257013171636e-06, "loss": 17.1162, "step": 19357 }, { "epoch": 0.3538486848118157, "grad_norm": 6.731553770253061, "learning_rate": 7.493000425226358e-06, "loss": 17.816, "step": 19358 }, { "epoch": 0.3538669640082622, "grad_norm": 6.091521486875263, "learning_rate": 7.492743828543327e-06, "loss": 17.4561, "step": 19359 }, { "epoch": 0.3538852432047087, "grad_norm": 6.8667630201488725, "learning_rate": 7.492487223123448e-06, "loss": 17.7971, "step": 19360 }, { "epoch": 0.35390352240115525, "grad_norm": 6.19592515380013, "learning_rate": 7.492230608967614e-06, "loss": 17.2811, "step": 19361 }, { "epoch": 0.3539218015976018, "grad_norm": 5.807384444053767, "learning_rate": 7.491973986076733e-06, "loss": 17.3417, "step": 19362 }, { "epoch": 0.3539400807940483, "grad_norm": 7.155386757335756, "learning_rate": 7.491717354451695e-06, "loss": 17.7106, "step": 19363 }, { "epoch": 0.3539583599904948, "grad_norm": 6.538612560506778, "learning_rate": 7.491460714093406e-06, "loss": 17.2755, "step": 19364 }, { "epoch": 0.35397663918694133, "grad_norm": 6.458033663280672, "learning_rate": 7.491204065002763e-06, "loss": 17.4271, "step": 19365 }, { "epoch": 0.35399491838338787, "grad_norm": 5.739066204118389, "learning_rate": 7.4909474071806665e-06, "loss": 17.1759, "step": 19366 }, { "epoch": 0.3540131975798344, "grad_norm": 6.692072568599566, "learning_rate": 7.490690740628015e-06, "loss": 17.6206, "step": 19367 }, { "epoch": 0.35403147677628094, "grad_norm": 8.267619576579627, "learning_rate": 7.4904340653457086e-06, "loss": 18.2098, "step": 19368 }, { "epoch": 0.3540497559727274, "grad_norm": 7.1204361353633825, "learning_rate": 7.4901773813346465e-06, "loss": 17.9392, "step": 19369 }, { "epoch": 0.35406803516917396, "grad_norm": 6.691643134926981, "learning_rate": 7.489920688595729e-06, "loss": 17.7207, "step": 19370 }, { "epoch": 0.3540863143656205, "grad_norm": 6.25705543192282, "learning_rate": 7.489663987129855e-06, "loss": 17.4774, "step": 19371 }, { "epoch": 0.354104593562067, "grad_norm": 7.4853794775293885, "learning_rate": 7.489407276937927e-06, "loss": 18.0386, "step": 19372 }, { "epoch": 0.35412287275851356, "grad_norm": 7.476864122292723, "learning_rate": 7.48915055802084e-06, "loss": 18.1652, "step": 19373 }, { "epoch": 0.35414115195496004, "grad_norm": 9.351362439238002, "learning_rate": 7.488893830379498e-06, "loss": 18.3186, "step": 19374 }, { "epoch": 0.3541594311514066, "grad_norm": 8.284016530529174, "learning_rate": 7.4886370940147975e-06, "loss": 18.4558, "step": 19375 }, { "epoch": 0.3541777103478531, "grad_norm": 4.6921299243565455, "learning_rate": 7.4883803489276404e-06, "loss": 16.9383, "step": 19376 }, { "epoch": 0.35419598954429965, "grad_norm": 5.42530176855745, "learning_rate": 7.4881235951189265e-06, "loss": 17.0558, "step": 19377 }, { "epoch": 0.35421426874074613, "grad_norm": 7.705040512252523, "learning_rate": 7.487866832589555e-06, "loss": 17.8432, "step": 19378 }, { "epoch": 0.35423254793719267, "grad_norm": 6.561441805437232, "learning_rate": 7.487610061340427e-06, "loss": 17.3472, "step": 19379 }, { "epoch": 0.3542508271336392, "grad_norm": 5.070628575814306, "learning_rate": 7.48735328137244e-06, "loss": 16.7991, "step": 19380 }, { "epoch": 0.35426910633008574, "grad_norm": 5.672514503446029, "learning_rate": 7.487096492686498e-06, "loss": 16.9956, "step": 19381 }, { "epoch": 0.3542873855265323, "grad_norm": 6.737898024418183, "learning_rate": 7.486839695283497e-06, "loss": 17.5229, "step": 19382 }, { "epoch": 0.35430566472297875, "grad_norm": 6.182176476803175, "learning_rate": 7.486582889164338e-06, "loss": 17.5448, "step": 19383 }, { "epoch": 0.3543239439194253, "grad_norm": 8.33687576037172, "learning_rate": 7.486326074329923e-06, "loss": 18.6737, "step": 19384 }, { "epoch": 0.3543422231158718, "grad_norm": 5.595973665793096, "learning_rate": 7.48606925078115e-06, "loss": 17.1997, "step": 19385 }, { "epoch": 0.35436050231231836, "grad_norm": 5.926973625388309, "learning_rate": 7.4858124185189215e-06, "loss": 17.1603, "step": 19386 }, { "epoch": 0.3543787815087649, "grad_norm": 5.435840466787997, "learning_rate": 7.485555577544136e-06, "loss": 17.1007, "step": 19387 }, { "epoch": 0.3543970607052114, "grad_norm": 7.000065925848345, "learning_rate": 7.4852987278576915e-06, "loss": 17.7634, "step": 19388 }, { "epoch": 0.3544153399016579, "grad_norm": 6.415895711266425, "learning_rate": 7.485041869460493e-06, "loss": 17.4346, "step": 19389 }, { "epoch": 0.35443361909810445, "grad_norm": 6.9250974739440645, "learning_rate": 7.4847850023534375e-06, "loss": 17.7694, "step": 19390 }, { "epoch": 0.354451898294551, "grad_norm": 6.543538726229283, "learning_rate": 7.484528126537426e-06, "loss": 17.6611, "step": 19391 }, { "epoch": 0.3544701774909975, "grad_norm": 5.240844858653111, "learning_rate": 7.484271242013359e-06, "loss": 16.8667, "step": 19392 }, { "epoch": 0.354488456687444, "grad_norm": 6.863256791947771, "learning_rate": 7.484014348782138e-06, "loss": 17.4832, "step": 19393 }, { "epoch": 0.35450673588389053, "grad_norm": 6.642713304065364, "learning_rate": 7.483757446844661e-06, "loss": 17.7014, "step": 19394 }, { "epoch": 0.35452501508033707, "grad_norm": 6.849159659362434, "learning_rate": 7.48350053620183e-06, "loss": 17.4561, "step": 19395 }, { "epoch": 0.3545432942767836, "grad_norm": 5.535302093579434, "learning_rate": 7.4832436168545466e-06, "loss": 17.1929, "step": 19396 }, { "epoch": 0.35456157347323014, "grad_norm": 6.1383064259059, "learning_rate": 7.4829866888037065e-06, "loss": 17.3695, "step": 19397 }, { "epoch": 0.3545798526696766, "grad_norm": 6.16842294986674, "learning_rate": 7.482729752050215e-06, "loss": 17.5157, "step": 19398 }, { "epoch": 0.35459813186612316, "grad_norm": 6.435334878102475, "learning_rate": 7.4824728065949735e-06, "loss": 17.6294, "step": 19399 }, { "epoch": 0.3546164110625697, "grad_norm": 7.209909555953054, "learning_rate": 7.482215852438878e-06, "loss": 17.7161, "step": 19400 }, { "epoch": 0.3546346902590162, "grad_norm": 5.889956488909761, "learning_rate": 7.481958889582832e-06, "loss": 17.2855, "step": 19401 }, { "epoch": 0.35465296945546276, "grad_norm": 7.656774016906944, "learning_rate": 7.481701918027734e-06, "loss": 18.104, "step": 19402 }, { "epoch": 0.35467124865190924, "grad_norm": 8.024126407579462, "learning_rate": 7.481444937774488e-06, "loss": 18.0196, "step": 19403 }, { "epoch": 0.3546895278483558, "grad_norm": 7.285299984162822, "learning_rate": 7.4811879488239915e-06, "loss": 17.7149, "step": 19404 }, { "epoch": 0.3547078070448023, "grad_norm": 6.37127872968481, "learning_rate": 7.480930951177148e-06, "loss": 17.4418, "step": 19405 }, { "epoch": 0.35472608624124885, "grad_norm": 7.470571138470041, "learning_rate": 7.480673944834856e-06, "loss": 17.9877, "step": 19406 }, { "epoch": 0.3547443654376954, "grad_norm": 8.185439832267216, "learning_rate": 7.480416929798016e-06, "loss": 17.9194, "step": 19407 }, { "epoch": 0.35476264463414187, "grad_norm": 5.043994100916689, "learning_rate": 7.480159906067531e-06, "loss": 17.0113, "step": 19408 }, { "epoch": 0.3547809238305884, "grad_norm": 6.712227558010779, "learning_rate": 7.479902873644301e-06, "loss": 17.8045, "step": 19409 }, { "epoch": 0.35479920302703494, "grad_norm": 7.727220735557964, "learning_rate": 7.479645832529225e-06, "loss": 18.0885, "step": 19410 }, { "epoch": 0.3548174822234815, "grad_norm": 7.878353648153295, "learning_rate": 7.479388782723208e-06, "loss": 17.9703, "step": 19411 }, { "epoch": 0.35483576141992795, "grad_norm": 7.830284306740343, "learning_rate": 7.479131724227147e-06, "loss": 17.9463, "step": 19412 }, { "epoch": 0.3548540406163745, "grad_norm": 6.335659283153983, "learning_rate": 7.4788746570419454e-06, "loss": 17.2787, "step": 19413 }, { "epoch": 0.354872319812821, "grad_norm": 6.30472829977409, "learning_rate": 7.478617581168503e-06, "loss": 17.2966, "step": 19414 }, { "epoch": 0.35489059900926756, "grad_norm": 6.574696312691399, "learning_rate": 7.478360496607719e-06, "loss": 17.8703, "step": 19415 }, { "epoch": 0.3549088782057141, "grad_norm": 6.392912622945608, "learning_rate": 7.478103403360498e-06, "loss": 17.1885, "step": 19416 }, { "epoch": 0.3549271574021606, "grad_norm": 6.15821031640931, "learning_rate": 7.477846301427741e-06, "loss": 17.4841, "step": 19417 }, { "epoch": 0.3549454365986071, "grad_norm": 5.8032289235700905, "learning_rate": 7.4775891908103456e-06, "loss": 17.2369, "step": 19418 }, { "epoch": 0.35496371579505365, "grad_norm": 7.8978308302965, "learning_rate": 7.477332071509217e-06, "loss": 18.2384, "step": 19419 }, { "epoch": 0.3549819949915002, "grad_norm": 7.281175734484118, "learning_rate": 7.477074943525253e-06, "loss": 17.8954, "step": 19420 }, { "epoch": 0.3550002741879467, "grad_norm": 6.277923838885113, "learning_rate": 7.476817806859357e-06, "loss": 17.3499, "step": 19421 }, { "epoch": 0.3550185533843932, "grad_norm": 7.200947809816378, "learning_rate": 7.476560661512429e-06, "loss": 17.6936, "step": 19422 }, { "epoch": 0.35503683258083973, "grad_norm": 7.9194955302842756, "learning_rate": 7.476303507485371e-06, "loss": 17.7159, "step": 19423 }, { "epoch": 0.35505511177728627, "grad_norm": 5.766785049922964, "learning_rate": 7.4760463447790844e-06, "loss": 17.3214, "step": 19424 }, { "epoch": 0.3550733909737328, "grad_norm": 7.358756659132915, "learning_rate": 7.47578917339447e-06, "loss": 18.048, "step": 19425 }, { "epoch": 0.35509167017017934, "grad_norm": 7.4608818189334, "learning_rate": 7.475531993332429e-06, "loss": 17.7924, "step": 19426 }, { "epoch": 0.3551099493666258, "grad_norm": 6.007732917341111, "learning_rate": 7.475274804593864e-06, "loss": 17.2599, "step": 19427 }, { "epoch": 0.35512822856307236, "grad_norm": 7.804206020385914, "learning_rate": 7.475017607179676e-06, "loss": 18.2085, "step": 19428 }, { "epoch": 0.3551465077595189, "grad_norm": 6.8839181908048666, "learning_rate": 7.474760401090764e-06, "loss": 17.9312, "step": 19429 }, { "epoch": 0.35516478695596543, "grad_norm": 7.304411432808902, "learning_rate": 7.474503186328033e-06, "loss": 17.8743, "step": 19430 }, { "epoch": 0.35518306615241196, "grad_norm": 7.846221445858971, "learning_rate": 7.474245962892382e-06, "loss": 18.1887, "step": 19431 }, { "epoch": 0.35520134534885844, "grad_norm": 6.601758236885788, "learning_rate": 7.4739887307847145e-06, "loss": 17.3935, "step": 19432 }, { "epoch": 0.355219624545305, "grad_norm": 5.638614361581859, "learning_rate": 7.473731490005931e-06, "loss": 17.2594, "step": 19433 }, { "epoch": 0.3552379037417515, "grad_norm": 8.1558423597002, "learning_rate": 7.473474240556934e-06, "loss": 18.1723, "step": 19434 }, { "epoch": 0.35525618293819805, "grad_norm": 6.212145101632636, "learning_rate": 7.473216982438624e-06, "loss": 17.3857, "step": 19435 }, { "epoch": 0.3552744621346446, "grad_norm": 6.131519220006103, "learning_rate": 7.472959715651902e-06, "loss": 17.4338, "step": 19436 }, { "epoch": 0.35529274133109107, "grad_norm": 5.871994721306829, "learning_rate": 7.472702440197672e-06, "loss": 17.0883, "step": 19437 }, { "epoch": 0.3553110205275376, "grad_norm": 6.403552947923738, "learning_rate": 7.472445156076834e-06, "loss": 17.5859, "step": 19438 }, { "epoch": 0.35532929972398414, "grad_norm": 8.193769561087173, "learning_rate": 7.47218786329029e-06, "loss": 18.3522, "step": 19439 }, { "epoch": 0.3553475789204307, "grad_norm": 5.705550408018775, "learning_rate": 7.471930561838943e-06, "loss": 17.3153, "step": 19440 }, { "epoch": 0.3553658581168772, "grad_norm": 5.943444147922853, "learning_rate": 7.471673251723694e-06, "loss": 17.3869, "step": 19441 }, { "epoch": 0.3553841373133237, "grad_norm": 6.622891918132139, "learning_rate": 7.471415932945443e-06, "loss": 17.8957, "step": 19442 }, { "epoch": 0.3554024165097702, "grad_norm": 7.742108703211141, "learning_rate": 7.4711586055050944e-06, "loss": 18.1735, "step": 19443 }, { "epoch": 0.35542069570621676, "grad_norm": 5.842591222976607, "learning_rate": 7.470901269403551e-06, "loss": 17.3396, "step": 19444 }, { "epoch": 0.3554389749026633, "grad_norm": 5.9068511967150705, "learning_rate": 7.470643924641712e-06, "loss": 17.2643, "step": 19445 }, { "epoch": 0.3554572540991098, "grad_norm": 6.018712598958013, "learning_rate": 7.47038657122048e-06, "loss": 17.3815, "step": 19446 }, { "epoch": 0.3554755332955563, "grad_norm": 7.499780665216952, "learning_rate": 7.470129209140756e-06, "loss": 18.296, "step": 19447 }, { "epoch": 0.35549381249200285, "grad_norm": 9.515142214114233, "learning_rate": 7.469871838403446e-06, "loss": 17.596, "step": 19448 }, { "epoch": 0.3555120916884494, "grad_norm": 5.112666074246406, "learning_rate": 7.469614459009449e-06, "loss": 16.9713, "step": 19449 }, { "epoch": 0.3555303708848959, "grad_norm": 7.224288794638123, "learning_rate": 7.469357070959667e-06, "loss": 17.7355, "step": 19450 }, { "epoch": 0.3555486500813424, "grad_norm": 7.842872194239781, "learning_rate": 7.469099674255002e-06, "loss": 17.8648, "step": 19451 }, { "epoch": 0.35556692927778893, "grad_norm": 6.537318214223713, "learning_rate": 7.468842268896359e-06, "loss": 17.4829, "step": 19452 }, { "epoch": 0.35558520847423547, "grad_norm": 5.624756200968691, "learning_rate": 7.468584854884636e-06, "loss": 17.1853, "step": 19453 }, { "epoch": 0.355603487670682, "grad_norm": 6.1704492959040245, "learning_rate": 7.468327432220739e-06, "loss": 17.1741, "step": 19454 }, { "epoch": 0.35562176686712854, "grad_norm": 5.6975740937579635, "learning_rate": 7.468070000905568e-06, "loss": 17.1753, "step": 19455 }, { "epoch": 0.355640046063575, "grad_norm": 7.813950691810959, "learning_rate": 7.467812560940025e-06, "loss": 18.2112, "step": 19456 }, { "epoch": 0.35565832526002156, "grad_norm": 5.335502955682161, "learning_rate": 7.467555112325013e-06, "loss": 17.0313, "step": 19457 }, { "epoch": 0.3556766044564681, "grad_norm": 6.446712609176233, "learning_rate": 7.467297655061437e-06, "loss": 17.4101, "step": 19458 }, { "epoch": 0.35569488365291463, "grad_norm": 6.1874619697485365, "learning_rate": 7.467040189150194e-06, "loss": 17.3346, "step": 19459 }, { "epoch": 0.35571316284936116, "grad_norm": 6.018356849341548, "learning_rate": 7.466782714592191e-06, "loss": 17.0546, "step": 19460 }, { "epoch": 0.35573144204580764, "grad_norm": 5.294735414932873, "learning_rate": 7.466525231388327e-06, "loss": 17.1645, "step": 19461 }, { "epoch": 0.3557497212422542, "grad_norm": 6.469328490918808, "learning_rate": 7.4662677395395074e-06, "loss": 17.5357, "step": 19462 }, { "epoch": 0.3557680004387007, "grad_norm": 6.282228377007154, "learning_rate": 7.466010239046632e-06, "loss": 17.6503, "step": 19463 }, { "epoch": 0.35578627963514725, "grad_norm": 6.608266845938125, "learning_rate": 7.465752729910607e-06, "loss": 17.4305, "step": 19464 }, { "epoch": 0.3558045588315938, "grad_norm": 6.311324031626701, "learning_rate": 7.465495212132331e-06, "loss": 17.7267, "step": 19465 }, { "epoch": 0.35582283802804027, "grad_norm": 6.094071624844103, "learning_rate": 7.465237685712708e-06, "loss": 17.3744, "step": 19466 }, { "epoch": 0.3558411172244868, "grad_norm": 7.621146830460536, "learning_rate": 7.464980150652642e-06, "loss": 17.8176, "step": 19467 }, { "epoch": 0.35585939642093334, "grad_norm": 7.217791900838364, "learning_rate": 7.464722606953034e-06, "loss": 18.1047, "step": 19468 }, { "epoch": 0.3558776756173799, "grad_norm": 6.719686606133383, "learning_rate": 7.4644650546147875e-06, "loss": 17.65, "step": 19469 }, { "epoch": 0.3558959548138264, "grad_norm": 5.859634824966476, "learning_rate": 7.464207493638803e-06, "loss": 17.4257, "step": 19470 }, { "epoch": 0.3559142340102729, "grad_norm": 7.003694273833317, "learning_rate": 7.463949924025987e-06, "loss": 17.9359, "step": 19471 }, { "epoch": 0.3559325132067194, "grad_norm": 6.715987847591053, "learning_rate": 7.463692345777241e-06, "loss": 17.4042, "step": 19472 }, { "epoch": 0.35595079240316596, "grad_norm": 6.6440791916900235, "learning_rate": 7.463434758893465e-06, "loss": 17.2459, "step": 19473 }, { "epoch": 0.3559690715996125, "grad_norm": 6.703365635526031, "learning_rate": 7.4631771633755645e-06, "loss": 17.6477, "step": 19474 }, { "epoch": 0.35598735079605903, "grad_norm": 7.42266547463853, "learning_rate": 7.462919559224442e-06, "loss": 18.0435, "step": 19475 }, { "epoch": 0.3560056299925055, "grad_norm": 6.53594389417586, "learning_rate": 7.462661946441001e-06, "loss": 17.4178, "step": 19476 }, { "epoch": 0.35602390918895205, "grad_norm": 5.858822877464721, "learning_rate": 7.462404325026142e-06, "loss": 17.5212, "step": 19477 }, { "epoch": 0.3560421883853986, "grad_norm": 6.326782648560232, "learning_rate": 7.46214669498077e-06, "loss": 17.1662, "step": 19478 }, { "epoch": 0.3560604675818451, "grad_norm": 7.593968420428665, "learning_rate": 7.461889056305789e-06, "loss": 17.7736, "step": 19479 }, { "epoch": 0.3560787467782916, "grad_norm": 5.912228012846317, "learning_rate": 7.4616314090020995e-06, "loss": 17.0636, "step": 19480 }, { "epoch": 0.35609702597473814, "grad_norm": 6.369144309951476, "learning_rate": 7.461373753070605e-06, "loss": 17.5882, "step": 19481 }, { "epoch": 0.35611530517118467, "grad_norm": 8.272102536758139, "learning_rate": 7.4611160885122105e-06, "loss": 17.848, "step": 19482 }, { "epoch": 0.3561335843676312, "grad_norm": 6.165283653648815, "learning_rate": 7.460858415327815e-06, "loss": 17.2829, "step": 19483 }, { "epoch": 0.35615186356407774, "grad_norm": 6.53035454580862, "learning_rate": 7.460600733518326e-06, "loss": 17.4869, "step": 19484 }, { "epoch": 0.3561701427605242, "grad_norm": 6.209285409291938, "learning_rate": 7.460343043084645e-06, "loss": 17.4252, "step": 19485 }, { "epoch": 0.35618842195697076, "grad_norm": 6.619422600433882, "learning_rate": 7.460085344027675e-06, "loss": 17.3904, "step": 19486 }, { "epoch": 0.3562067011534173, "grad_norm": 6.954155104690954, "learning_rate": 7.45982763634832e-06, "loss": 17.5735, "step": 19487 }, { "epoch": 0.35622498034986383, "grad_norm": 8.274983654871239, "learning_rate": 7.45956992004748e-06, "loss": 18.6415, "step": 19488 }, { "epoch": 0.35624325954631036, "grad_norm": 6.702428815338919, "learning_rate": 7.459312195126064e-06, "loss": 17.6304, "step": 19489 }, { "epoch": 0.35626153874275684, "grad_norm": 5.848455807410714, "learning_rate": 7.459054461584971e-06, "loss": 17.2761, "step": 19490 }, { "epoch": 0.3562798179392034, "grad_norm": 6.183570962589923, "learning_rate": 7.458796719425106e-06, "loss": 17.489, "step": 19491 }, { "epoch": 0.3562980971356499, "grad_norm": 6.0344839279626195, "learning_rate": 7.458538968647371e-06, "loss": 17.5043, "step": 19492 }, { "epoch": 0.35631637633209645, "grad_norm": 7.7679328374348735, "learning_rate": 7.45828120925267e-06, "loss": 18.2713, "step": 19493 }, { "epoch": 0.356334655528543, "grad_norm": 7.188328960680923, "learning_rate": 7.458023441241907e-06, "loss": 17.872, "step": 19494 }, { "epoch": 0.35635293472498947, "grad_norm": 6.708241420470331, "learning_rate": 7.457765664615986e-06, "loss": 17.8736, "step": 19495 }, { "epoch": 0.356371213921436, "grad_norm": 5.880791325612288, "learning_rate": 7.4575078793758095e-06, "loss": 17.4544, "step": 19496 }, { "epoch": 0.35638949311788254, "grad_norm": 7.228093828812865, "learning_rate": 7.4572500855222795e-06, "loss": 17.7935, "step": 19497 }, { "epoch": 0.3564077723143291, "grad_norm": 7.389873482180641, "learning_rate": 7.456992283056302e-06, "loss": 17.7376, "step": 19498 }, { "epoch": 0.3564260515107756, "grad_norm": 6.079701279278257, "learning_rate": 7.456734471978782e-06, "loss": 17.4537, "step": 19499 }, { "epoch": 0.3564443307072221, "grad_norm": 6.49756636901589, "learning_rate": 7.456476652290619e-06, "loss": 17.8027, "step": 19500 }, { "epoch": 0.3564626099036686, "grad_norm": 8.343409896559569, "learning_rate": 7.456218823992718e-06, "loss": 18.3613, "step": 19501 }, { "epoch": 0.35648088910011516, "grad_norm": 7.275233240696608, "learning_rate": 7.455960987085982e-06, "loss": 18.1414, "step": 19502 }, { "epoch": 0.3564991682965617, "grad_norm": 5.746313584875896, "learning_rate": 7.4557031415713185e-06, "loss": 17.3823, "step": 19503 }, { "epoch": 0.35651744749300823, "grad_norm": 7.209214987742282, "learning_rate": 7.455445287449627e-06, "loss": 17.9968, "step": 19504 }, { "epoch": 0.3565357266894547, "grad_norm": 5.615786379902083, "learning_rate": 7.455187424721814e-06, "loss": 17.1986, "step": 19505 }, { "epoch": 0.35655400588590125, "grad_norm": 6.919847812923979, "learning_rate": 7.454929553388781e-06, "loss": 17.5477, "step": 19506 }, { "epoch": 0.3565722850823478, "grad_norm": 6.458199993844874, "learning_rate": 7.454671673451434e-06, "loss": 17.3889, "step": 19507 }, { "epoch": 0.3565905642787943, "grad_norm": 5.492010412363909, "learning_rate": 7.454413784910675e-06, "loss": 17.189, "step": 19508 }, { "epoch": 0.35660884347524086, "grad_norm": 5.620456516038945, "learning_rate": 7.454155887767409e-06, "loss": 17.3132, "step": 19509 }, { "epoch": 0.35662712267168734, "grad_norm": 7.333231458293356, "learning_rate": 7.453897982022539e-06, "loss": 17.9381, "step": 19510 }, { "epoch": 0.35664540186813387, "grad_norm": 5.895555401955035, "learning_rate": 7.453640067676971e-06, "loss": 17.6563, "step": 19511 }, { "epoch": 0.3566636810645804, "grad_norm": 6.095911156600389, "learning_rate": 7.4533821447316045e-06, "loss": 17.2515, "step": 19512 }, { "epoch": 0.35668196026102694, "grad_norm": 5.91007587085715, "learning_rate": 7.4531242131873505e-06, "loss": 17.2879, "step": 19513 }, { "epoch": 0.3567002394574734, "grad_norm": 7.874624189571801, "learning_rate": 7.452866273045106e-06, "loss": 17.926, "step": 19514 }, { "epoch": 0.35671851865391996, "grad_norm": 6.741908116251173, "learning_rate": 7.452608324305779e-06, "loss": 17.5684, "step": 19515 }, { "epoch": 0.3567367978503665, "grad_norm": 5.734303574903024, "learning_rate": 7.452350366970273e-06, "loss": 16.9621, "step": 19516 }, { "epoch": 0.35675507704681303, "grad_norm": 6.827994662104138, "learning_rate": 7.452092401039491e-06, "loss": 17.8539, "step": 19517 }, { "epoch": 0.35677335624325957, "grad_norm": 5.554487343322766, "learning_rate": 7.451834426514339e-06, "loss": 17.2516, "step": 19518 }, { "epoch": 0.35679163543970605, "grad_norm": 8.996661982365572, "learning_rate": 7.4515764433957195e-06, "loss": 18.6347, "step": 19519 }, { "epoch": 0.3568099146361526, "grad_norm": 5.777592153915319, "learning_rate": 7.4513184516845376e-06, "loss": 17.1962, "step": 19520 }, { "epoch": 0.3568281938325991, "grad_norm": 6.6244143733216205, "learning_rate": 7.451060451381696e-06, "loss": 17.6402, "step": 19521 }, { "epoch": 0.35684647302904565, "grad_norm": 5.736463429171136, "learning_rate": 7.450802442488101e-06, "loss": 17.448, "step": 19522 }, { "epoch": 0.3568647522254922, "grad_norm": 6.225464066752719, "learning_rate": 7.450544425004657e-06, "loss": 17.4283, "step": 19523 }, { "epoch": 0.35688303142193867, "grad_norm": 5.557820621065601, "learning_rate": 7.450286398932266e-06, "loss": 17.2756, "step": 19524 }, { "epoch": 0.3569013106183852, "grad_norm": 7.261448883357131, "learning_rate": 7.450028364271834e-06, "loss": 17.9538, "step": 19525 }, { "epoch": 0.35691958981483174, "grad_norm": 7.418123239235833, "learning_rate": 7.449770321024265e-06, "loss": 17.8033, "step": 19526 }, { "epoch": 0.3569378690112783, "grad_norm": 6.38774604313727, "learning_rate": 7.449512269190466e-06, "loss": 17.5972, "step": 19527 }, { "epoch": 0.3569561482077248, "grad_norm": 7.360832041667508, "learning_rate": 7.449254208771337e-06, "loss": 18.2713, "step": 19528 }, { "epoch": 0.3569744274041713, "grad_norm": 5.408830883075092, "learning_rate": 7.4489961397677835e-06, "loss": 17.1253, "step": 19529 }, { "epoch": 0.3569927066006178, "grad_norm": 6.239119904201538, "learning_rate": 7.4487380621807125e-06, "loss": 17.3757, "step": 19530 }, { "epoch": 0.35701098579706436, "grad_norm": 5.891247481157093, "learning_rate": 7.4484799760110285e-06, "loss": 17.1889, "step": 19531 }, { "epoch": 0.3570292649935109, "grad_norm": 6.40253611384729, "learning_rate": 7.448221881259633e-06, "loss": 17.7897, "step": 19532 }, { "epoch": 0.35704754418995743, "grad_norm": 6.509498408766673, "learning_rate": 7.4479637779274315e-06, "loss": 17.5238, "step": 19533 }, { "epoch": 0.3570658233864039, "grad_norm": 5.435814259320647, "learning_rate": 7.44770566601533e-06, "loss": 16.9849, "step": 19534 }, { "epoch": 0.35708410258285045, "grad_norm": 6.373180457230543, "learning_rate": 7.447447545524234e-06, "loss": 17.5635, "step": 19535 }, { "epoch": 0.357102381779297, "grad_norm": 6.064176241219916, "learning_rate": 7.447189416455045e-06, "loss": 17.3874, "step": 19536 }, { "epoch": 0.3571206609757435, "grad_norm": 7.94021393627077, "learning_rate": 7.44693127880867e-06, "loss": 18.064, "step": 19537 }, { "epoch": 0.35713894017219006, "grad_norm": 6.717605353286808, "learning_rate": 7.446673132586013e-06, "loss": 17.7664, "step": 19538 }, { "epoch": 0.35715721936863654, "grad_norm": 5.421782166907727, "learning_rate": 7.446414977787979e-06, "loss": 17.1765, "step": 19539 }, { "epoch": 0.35717549856508307, "grad_norm": 6.563266448417799, "learning_rate": 7.446156814415472e-06, "loss": 17.6065, "step": 19540 }, { "epoch": 0.3571937777615296, "grad_norm": 5.986476597498721, "learning_rate": 7.4458986424694e-06, "loss": 17.385, "step": 19541 }, { "epoch": 0.35721205695797614, "grad_norm": 6.529516511890925, "learning_rate": 7.445640461950664e-06, "loss": 17.4231, "step": 19542 }, { "epoch": 0.3572303361544227, "grad_norm": 7.215770537126524, "learning_rate": 7.4453822728601695e-06, "loss": 17.7901, "step": 19543 }, { "epoch": 0.35724861535086916, "grad_norm": 6.153997687868142, "learning_rate": 7.445124075198824e-06, "loss": 17.3847, "step": 19544 }, { "epoch": 0.3572668945473157, "grad_norm": 6.406699556092609, "learning_rate": 7.44486586896753e-06, "loss": 17.5249, "step": 19545 }, { "epoch": 0.35728517374376223, "grad_norm": 5.122915638185464, "learning_rate": 7.444607654167194e-06, "loss": 17.0643, "step": 19546 }, { "epoch": 0.35730345294020877, "grad_norm": 6.14367321722611, "learning_rate": 7.444349430798718e-06, "loss": 17.3365, "step": 19547 }, { "epoch": 0.35732173213665525, "grad_norm": 6.8760673202196125, "learning_rate": 7.444091198863012e-06, "loss": 17.8723, "step": 19548 }, { "epoch": 0.3573400113331018, "grad_norm": 9.740963472137814, "learning_rate": 7.4438329583609785e-06, "loss": 19.0003, "step": 19549 }, { "epoch": 0.3573582905295483, "grad_norm": 6.189180487315558, "learning_rate": 7.443574709293522e-06, "loss": 17.4863, "step": 19550 }, { "epoch": 0.35737656972599485, "grad_norm": 7.1232218886265555, "learning_rate": 7.443316451661546e-06, "loss": 17.9244, "step": 19551 }, { "epoch": 0.3573948489224414, "grad_norm": 6.644580298661404, "learning_rate": 7.4430581854659615e-06, "loss": 17.6901, "step": 19552 }, { "epoch": 0.35741312811888787, "grad_norm": 7.33589492496623, "learning_rate": 7.442799910707667e-06, "loss": 17.77, "step": 19553 }, { "epoch": 0.3574314073153344, "grad_norm": 7.316198263494221, "learning_rate": 7.442541627387572e-06, "loss": 18.1297, "step": 19554 }, { "epoch": 0.35744968651178094, "grad_norm": 7.166057386822801, "learning_rate": 7.442283335506582e-06, "loss": 17.8345, "step": 19555 }, { "epoch": 0.3574679657082275, "grad_norm": 6.646128805745733, "learning_rate": 7.442025035065598e-06, "loss": 17.4866, "step": 19556 }, { "epoch": 0.357486244904674, "grad_norm": 6.057776358116597, "learning_rate": 7.441766726065529e-06, "loss": 17.4085, "step": 19557 }, { "epoch": 0.3575045241011205, "grad_norm": 6.231824837767921, "learning_rate": 7.441508408507281e-06, "loss": 17.4139, "step": 19558 }, { "epoch": 0.357522803297567, "grad_norm": 6.982239309763197, "learning_rate": 7.441250082391756e-06, "loss": 18.093, "step": 19559 }, { "epoch": 0.35754108249401356, "grad_norm": 7.365303753528583, "learning_rate": 7.440991747719863e-06, "loss": 17.7381, "step": 19560 }, { "epoch": 0.3575593616904601, "grad_norm": 6.102411056837014, "learning_rate": 7.440733404492504e-06, "loss": 17.3601, "step": 19561 }, { "epoch": 0.35757764088690663, "grad_norm": 8.092326201907957, "learning_rate": 7.4404750527105885e-06, "loss": 18.417, "step": 19562 }, { "epoch": 0.3575959200833531, "grad_norm": 5.923153029325546, "learning_rate": 7.440216692375017e-06, "loss": 17.0967, "step": 19563 }, { "epoch": 0.35761419927979965, "grad_norm": 6.1980079939452795, "learning_rate": 7.4399583234867005e-06, "loss": 17.4764, "step": 19564 }, { "epoch": 0.3576324784762462, "grad_norm": 6.10555734087733, "learning_rate": 7.43969994604654e-06, "loss": 17.4802, "step": 19565 }, { "epoch": 0.3576507576726927, "grad_norm": 5.356375823423826, "learning_rate": 7.439441560055443e-06, "loss": 17.0387, "step": 19566 }, { "epoch": 0.35766903686913926, "grad_norm": 5.8036224360158135, "learning_rate": 7.4391831655143155e-06, "loss": 17.2135, "step": 19567 }, { "epoch": 0.35768731606558574, "grad_norm": 6.727786201025457, "learning_rate": 7.4389247624240635e-06, "loss": 17.4191, "step": 19568 }, { "epoch": 0.35770559526203227, "grad_norm": 6.942808237998604, "learning_rate": 7.43866635078559e-06, "loss": 17.7536, "step": 19569 }, { "epoch": 0.3577238744584788, "grad_norm": 7.271246304521421, "learning_rate": 7.438407930599802e-06, "loss": 17.9878, "step": 19570 }, { "epoch": 0.35774215365492534, "grad_norm": 7.343102527902273, "learning_rate": 7.438149501867609e-06, "loss": 17.6091, "step": 19571 }, { "epoch": 0.3577604328513719, "grad_norm": 7.014358727902585, "learning_rate": 7.437891064589912e-06, "loss": 17.4978, "step": 19572 }, { "epoch": 0.35777871204781836, "grad_norm": 6.316023574272729, "learning_rate": 7.437632618767619e-06, "loss": 17.6849, "step": 19573 }, { "epoch": 0.3577969912442649, "grad_norm": 7.1173021736225, "learning_rate": 7.437374164401632e-06, "loss": 18.0485, "step": 19574 }, { "epoch": 0.35781527044071143, "grad_norm": 6.922034267789044, "learning_rate": 7.437115701492863e-06, "loss": 17.6598, "step": 19575 }, { "epoch": 0.35783354963715797, "grad_norm": 6.941606087333205, "learning_rate": 7.436857230042215e-06, "loss": 17.7257, "step": 19576 }, { "epoch": 0.3578518288336045, "grad_norm": 5.806717517611175, "learning_rate": 7.436598750050593e-06, "loss": 17.2348, "step": 19577 }, { "epoch": 0.357870108030051, "grad_norm": 7.204718588209834, "learning_rate": 7.436340261518904e-06, "loss": 17.6794, "step": 19578 }, { "epoch": 0.3578883872264975, "grad_norm": 5.228357199531201, "learning_rate": 7.436081764448054e-06, "loss": 16.9076, "step": 19579 }, { "epoch": 0.35790666642294405, "grad_norm": 8.135319780959174, "learning_rate": 7.4358232588389475e-06, "loss": 18.3858, "step": 19580 }, { "epoch": 0.3579249456193906, "grad_norm": 6.395997239594845, "learning_rate": 7.435564744692494e-06, "loss": 17.4644, "step": 19581 }, { "epoch": 0.35794322481583707, "grad_norm": 6.621546714164927, "learning_rate": 7.435306222009597e-06, "loss": 17.5496, "step": 19582 }, { "epoch": 0.3579615040122836, "grad_norm": 7.346892370167702, "learning_rate": 7.435047690791162e-06, "loss": 17.5626, "step": 19583 }, { "epoch": 0.35797978320873014, "grad_norm": 5.783952019402652, "learning_rate": 7.434789151038097e-06, "loss": 17.3614, "step": 19584 }, { "epoch": 0.3579980624051767, "grad_norm": 5.760549772703515, "learning_rate": 7.434530602751307e-06, "loss": 17.2567, "step": 19585 }, { "epoch": 0.3580163416016232, "grad_norm": 6.478047570788055, "learning_rate": 7.434272045931698e-06, "loss": 17.4649, "step": 19586 }, { "epoch": 0.3580346207980697, "grad_norm": 6.221514341852532, "learning_rate": 7.434013480580178e-06, "loss": 17.2979, "step": 19587 }, { "epoch": 0.3580528999945162, "grad_norm": 8.038249083522063, "learning_rate": 7.43375490669765e-06, "loss": 18.2009, "step": 19588 }, { "epoch": 0.35807117919096276, "grad_norm": 7.153169603424881, "learning_rate": 7.433496324285023e-06, "loss": 17.8616, "step": 19589 }, { "epoch": 0.3580894583874093, "grad_norm": 6.140821138713422, "learning_rate": 7.433237733343204e-06, "loss": 17.2862, "step": 19590 }, { "epoch": 0.35810773758385583, "grad_norm": 6.303164117025946, "learning_rate": 7.432979133873096e-06, "loss": 17.1239, "step": 19591 }, { "epoch": 0.3581260167803023, "grad_norm": 7.40032118331771, "learning_rate": 7.432720525875608e-06, "loss": 17.7626, "step": 19592 }, { "epoch": 0.35814429597674885, "grad_norm": 5.211355925358332, "learning_rate": 7.432461909351646e-06, "loss": 17.0628, "step": 19593 }, { "epoch": 0.3581625751731954, "grad_norm": 7.288194341296274, "learning_rate": 7.432203284302115e-06, "loss": 18.2211, "step": 19594 }, { "epoch": 0.3581808543696419, "grad_norm": 7.43978040314632, "learning_rate": 7.431944650727924e-06, "loss": 17.9704, "step": 19595 }, { "epoch": 0.35819913356608846, "grad_norm": 8.187597270207503, "learning_rate": 7.431686008629978e-06, "loss": 18.0001, "step": 19596 }, { "epoch": 0.35821741276253494, "grad_norm": 7.247317235545807, "learning_rate": 7.431427358009182e-06, "loss": 17.9215, "step": 19597 }, { "epoch": 0.3582356919589815, "grad_norm": 7.942615379347667, "learning_rate": 7.431168698866444e-06, "loss": 17.7566, "step": 19598 }, { "epoch": 0.358253971155428, "grad_norm": 5.567173124767692, "learning_rate": 7.430910031202673e-06, "loss": 17.1107, "step": 19599 }, { "epoch": 0.35827225035187454, "grad_norm": 6.038390410749337, "learning_rate": 7.430651355018772e-06, "loss": 17.3829, "step": 19600 }, { "epoch": 0.3582905295483211, "grad_norm": 6.185621499809661, "learning_rate": 7.4303926703156484e-06, "loss": 17.3401, "step": 19601 }, { "epoch": 0.35830880874476756, "grad_norm": 6.998107385390362, "learning_rate": 7.43013397709421e-06, "loss": 17.6478, "step": 19602 }, { "epoch": 0.3583270879412141, "grad_norm": 6.423917792766361, "learning_rate": 7.429875275355364e-06, "loss": 17.5313, "step": 19603 }, { "epoch": 0.35834536713766063, "grad_norm": 6.190849407069878, "learning_rate": 7.429616565100014e-06, "loss": 17.6826, "step": 19604 }, { "epoch": 0.35836364633410717, "grad_norm": 6.864104331432642, "learning_rate": 7.429357846329069e-06, "loss": 17.4815, "step": 19605 }, { "epoch": 0.3583819255305537, "grad_norm": 5.633235072213729, "learning_rate": 7.429099119043437e-06, "loss": 17.101, "step": 19606 }, { "epoch": 0.3584002047270002, "grad_norm": 7.173497231752361, "learning_rate": 7.428840383244023e-06, "loss": 17.5816, "step": 19607 }, { "epoch": 0.3584184839234467, "grad_norm": 6.824575995155842, "learning_rate": 7.428581638931734e-06, "loss": 17.7751, "step": 19608 }, { "epoch": 0.35843676311989325, "grad_norm": 7.294082566289569, "learning_rate": 7.428322886107475e-06, "loss": 17.9937, "step": 19609 }, { "epoch": 0.3584550423163398, "grad_norm": 6.336024020206503, "learning_rate": 7.428064124772158e-06, "loss": 17.5486, "step": 19610 }, { "epoch": 0.3584733215127863, "grad_norm": 6.8384419425680045, "learning_rate": 7.4278053549266845e-06, "loss": 17.8061, "step": 19611 }, { "epoch": 0.3584916007092328, "grad_norm": 6.8563408704511835, "learning_rate": 7.427546576571966e-06, "loss": 17.8507, "step": 19612 }, { "epoch": 0.35850987990567934, "grad_norm": 5.672177529612705, "learning_rate": 7.427287789708907e-06, "loss": 17.2885, "step": 19613 }, { "epoch": 0.3585281591021259, "grad_norm": 6.383419232609466, "learning_rate": 7.427028994338414e-06, "loss": 17.3135, "step": 19614 }, { "epoch": 0.3585464382985724, "grad_norm": 7.179162938060904, "learning_rate": 7.426770190461394e-06, "loss": 17.9906, "step": 19615 }, { "epoch": 0.3585647174950189, "grad_norm": 5.885327218747135, "learning_rate": 7.4265113780787575e-06, "loss": 17.1926, "step": 19616 }, { "epoch": 0.3585829966914654, "grad_norm": 6.841343611008343, "learning_rate": 7.426252557191409e-06, "loss": 17.5999, "step": 19617 }, { "epoch": 0.35860127588791196, "grad_norm": 5.706515078920045, "learning_rate": 7.425993727800255e-06, "loss": 17.1247, "step": 19618 }, { "epoch": 0.3586195550843585, "grad_norm": 5.059189919687213, "learning_rate": 7.425734889906203e-06, "loss": 16.9361, "step": 19619 }, { "epoch": 0.35863783428080503, "grad_norm": 5.500086691594506, "learning_rate": 7.425476043510161e-06, "loss": 17.0845, "step": 19620 }, { "epoch": 0.3586561134772515, "grad_norm": 6.27450390437792, "learning_rate": 7.4252171886130365e-06, "loss": 17.4172, "step": 19621 }, { "epoch": 0.35867439267369805, "grad_norm": 5.902742990740609, "learning_rate": 7.424958325215736e-06, "loss": 17.596, "step": 19622 }, { "epoch": 0.3586926718701446, "grad_norm": 6.644123805087707, "learning_rate": 7.424699453319166e-06, "loss": 17.4616, "step": 19623 }, { "epoch": 0.3587109510665911, "grad_norm": 7.793181924074182, "learning_rate": 7.424440572924236e-06, "loss": 17.9932, "step": 19624 }, { "epoch": 0.35872923026303766, "grad_norm": 9.1000473577842, "learning_rate": 7.424181684031853e-06, "loss": 18.4022, "step": 19625 }, { "epoch": 0.35874750945948414, "grad_norm": 6.450341745546176, "learning_rate": 7.423922786642922e-06, "loss": 17.4903, "step": 19626 }, { "epoch": 0.3587657886559307, "grad_norm": 5.945848969936562, "learning_rate": 7.423663880758354e-06, "loss": 17.0628, "step": 19627 }, { "epoch": 0.3587840678523772, "grad_norm": 5.344817447692296, "learning_rate": 7.423404966379052e-06, "loss": 17.0436, "step": 19628 }, { "epoch": 0.35880234704882374, "grad_norm": 6.273733286302565, "learning_rate": 7.4231460435059255e-06, "loss": 17.3068, "step": 19629 }, { "epoch": 0.3588206262452703, "grad_norm": 6.762580780150825, "learning_rate": 7.422887112139884e-06, "loss": 17.5117, "step": 19630 }, { "epoch": 0.35883890544171676, "grad_norm": 12.948570375745069, "learning_rate": 7.422628172281834e-06, "loss": 18.4008, "step": 19631 }, { "epoch": 0.3588571846381633, "grad_norm": 7.659120306546429, "learning_rate": 7.422369223932682e-06, "loss": 18.1467, "step": 19632 }, { "epoch": 0.35887546383460983, "grad_norm": 6.1455289098079176, "learning_rate": 7.422110267093334e-06, "loss": 17.3468, "step": 19633 }, { "epoch": 0.35889374303105637, "grad_norm": 7.165817662349481, "learning_rate": 7.421851301764702e-06, "loss": 17.7761, "step": 19634 }, { "epoch": 0.3589120222275029, "grad_norm": 6.38528561482254, "learning_rate": 7.421592327947691e-06, "loss": 17.6262, "step": 19635 }, { "epoch": 0.3589303014239494, "grad_norm": 8.56793241169269, "learning_rate": 7.421333345643208e-06, "loss": 17.9467, "step": 19636 }, { "epoch": 0.3589485806203959, "grad_norm": 6.218560703794342, "learning_rate": 7.4210743548521625e-06, "loss": 17.5763, "step": 19637 }, { "epoch": 0.35896685981684245, "grad_norm": 7.398152881594439, "learning_rate": 7.4208153555754615e-06, "loss": 17.7664, "step": 19638 }, { "epoch": 0.358985139013289, "grad_norm": 8.148209656499118, "learning_rate": 7.420556347814012e-06, "loss": 18.0681, "step": 19639 }, { "epoch": 0.3590034182097355, "grad_norm": 5.627743029720429, "learning_rate": 7.420297331568723e-06, "loss": 17.3774, "step": 19640 }, { "epoch": 0.359021697406182, "grad_norm": 7.478425081380414, "learning_rate": 7.420038306840503e-06, "loss": 18.0255, "step": 19641 }, { "epoch": 0.35903997660262854, "grad_norm": 6.609427342782146, "learning_rate": 7.419779273630256e-06, "loss": 17.5987, "step": 19642 }, { "epoch": 0.3590582557990751, "grad_norm": 7.193156920052047, "learning_rate": 7.419520231938895e-06, "loss": 17.4684, "step": 19643 }, { "epoch": 0.3590765349955216, "grad_norm": 5.748449793544554, "learning_rate": 7.419261181767326e-06, "loss": 17.293, "step": 19644 }, { "epoch": 0.35909481419196815, "grad_norm": 8.081670635816513, "learning_rate": 7.419002123116456e-06, "loss": 18.1125, "step": 19645 }, { "epoch": 0.35911309338841463, "grad_norm": 5.235679311443473, "learning_rate": 7.418743055987192e-06, "loss": 16.8445, "step": 19646 }, { "epoch": 0.35913137258486116, "grad_norm": 7.671628058202542, "learning_rate": 7.418483980380444e-06, "loss": 18.4453, "step": 19647 }, { "epoch": 0.3591496517813077, "grad_norm": 5.769573530475302, "learning_rate": 7.418224896297121e-06, "loss": 17.2692, "step": 19648 }, { "epoch": 0.35916793097775424, "grad_norm": 6.803533960951056, "learning_rate": 7.417965803738127e-06, "loss": 17.6479, "step": 19649 }, { "epoch": 0.3591862101742007, "grad_norm": 6.928022572209373, "learning_rate": 7.417706702704375e-06, "loss": 17.8475, "step": 19650 }, { "epoch": 0.35920448937064725, "grad_norm": 6.335909893000433, "learning_rate": 7.4174475931967705e-06, "loss": 17.6418, "step": 19651 }, { "epoch": 0.3592227685670938, "grad_norm": 9.00018364645467, "learning_rate": 7.417188475216222e-06, "loss": 17.9307, "step": 19652 }, { "epoch": 0.3592410477635403, "grad_norm": 5.782366549676033, "learning_rate": 7.416929348763636e-06, "loss": 17.0915, "step": 19653 }, { "epoch": 0.35925932695998686, "grad_norm": 7.1325422603754, "learning_rate": 7.416670213839924e-06, "loss": 17.5685, "step": 19654 }, { "epoch": 0.35927760615643334, "grad_norm": 6.932598907319226, "learning_rate": 7.416411070445992e-06, "loss": 17.8335, "step": 19655 }, { "epoch": 0.3592958853528799, "grad_norm": 7.340339314323664, "learning_rate": 7.416151918582748e-06, "loss": 17.9561, "step": 19656 }, { "epoch": 0.3593141645493264, "grad_norm": 7.09431420271154, "learning_rate": 7.415892758251102e-06, "loss": 17.8449, "step": 19657 }, { "epoch": 0.35933244374577294, "grad_norm": 6.842800698365112, "learning_rate": 7.415633589451963e-06, "loss": 17.6761, "step": 19658 }, { "epoch": 0.3593507229422195, "grad_norm": 6.4437469966055225, "learning_rate": 7.4153744121862356e-06, "loss": 17.432, "step": 19659 }, { "epoch": 0.35936900213866596, "grad_norm": 6.043227159248423, "learning_rate": 7.4151152264548325e-06, "loss": 17.367, "step": 19660 }, { "epoch": 0.3593872813351125, "grad_norm": 5.698312052736918, "learning_rate": 7.414856032258657e-06, "loss": 17.2209, "step": 19661 }, { "epoch": 0.35940556053155903, "grad_norm": 5.7025338404511485, "learning_rate": 7.414596829598624e-06, "loss": 17.1254, "step": 19662 }, { "epoch": 0.35942383972800557, "grad_norm": 7.415967652311182, "learning_rate": 7.414337618475638e-06, "loss": 17.956, "step": 19663 }, { "epoch": 0.3594421189244521, "grad_norm": 7.879265535384837, "learning_rate": 7.414078398890607e-06, "loss": 18.2849, "step": 19664 }, { "epoch": 0.3594603981208986, "grad_norm": 6.156486612932872, "learning_rate": 7.413819170844441e-06, "loss": 17.5195, "step": 19665 }, { "epoch": 0.3594786773173451, "grad_norm": 6.833262405969314, "learning_rate": 7.4135599343380485e-06, "loss": 17.6161, "step": 19666 }, { "epoch": 0.35949695651379165, "grad_norm": 5.941007952575729, "learning_rate": 7.413300689372338e-06, "loss": 17.3599, "step": 19667 }, { "epoch": 0.3595152357102382, "grad_norm": 7.484804191904205, "learning_rate": 7.413041435948218e-06, "loss": 18.2024, "step": 19668 }, { "epoch": 0.3595335149066847, "grad_norm": 5.704438021847672, "learning_rate": 7.412782174066596e-06, "loss": 17.3, "step": 19669 }, { "epoch": 0.3595517941031312, "grad_norm": 6.185017125210529, "learning_rate": 7.412522903728383e-06, "loss": 17.2473, "step": 19670 }, { "epoch": 0.35957007329957774, "grad_norm": 5.671060004639427, "learning_rate": 7.412263624934486e-06, "loss": 17.3157, "step": 19671 }, { "epoch": 0.3595883524960243, "grad_norm": 6.884825009983396, "learning_rate": 7.412004337685817e-06, "loss": 17.9186, "step": 19672 }, { "epoch": 0.3596066316924708, "grad_norm": 7.112803035888674, "learning_rate": 7.411745041983279e-06, "loss": 17.7299, "step": 19673 }, { "epoch": 0.35962491088891735, "grad_norm": 7.537168009725052, "learning_rate": 7.411485737827784e-06, "loss": 17.6849, "step": 19674 }, { "epoch": 0.35964319008536383, "grad_norm": 8.076727056397893, "learning_rate": 7.411226425220241e-06, "loss": 17.8502, "step": 19675 }, { "epoch": 0.35966146928181036, "grad_norm": 7.422706306015047, "learning_rate": 7.41096710416156e-06, "loss": 17.7832, "step": 19676 }, { "epoch": 0.3596797484782569, "grad_norm": 6.264827269575593, "learning_rate": 7.410707774652648e-06, "loss": 17.4166, "step": 19677 }, { "epoch": 0.35969802767470344, "grad_norm": 6.39577954566566, "learning_rate": 7.410448436694413e-06, "loss": 17.6863, "step": 19678 }, { "epoch": 0.35971630687114997, "grad_norm": 6.806574691474655, "learning_rate": 7.410189090287767e-06, "loss": 17.4876, "step": 19679 }, { "epoch": 0.35973458606759645, "grad_norm": 5.805553498268519, "learning_rate": 7.4099297354336165e-06, "loss": 17.1755, "step": 19680 }, { "epoch": 0.359752865264043, "grad_norm": 6.210606853116422, "learning_rate": 7.4096703721328724e-06, "loss": 17.4134, "step": 19681 }, { "epoch": 0.3597711444604895, "grad_norm": 7.514337354941543, "learning_rate": 7.4094110003864425e-06, "loss": 18.1328, "step": 19682 }, { "epoch": 0.35978942365693606, "grad_norm": 7.110195460046373, "learning_rate": 7.409151620195234e-06, "loss": 17.6775, "step": 19683 }, { "epoch": 0.35980770285338254, "grad_norm": 6.409499865083177, "learning_rate": 7.4088922315601605e-06, "loss": 17.4388, "step": 19684 }, { "epoch": 0.3598259820498291, "grad_norm": 6.406989787801191, "learning_rate": 7.408632834482128e-06, "loss": 17.4169, "step": 19685 }, { "epoch": 0.3598442612462756, "grad_norm": 8.533000130720591, "learning_rate": 7.408373428962048e-06, "loss": 17.9988, "step": 19686 }, { "epoch": 0.35986254044272215, "grad_norm": 7.258823821723709, "learning_rate": 7.408114015000826e-06, "loss": 18.2898, "step": 19687 }, { "epoch": 0.3598808196391687, "grad_norm": 6.27707598990602, "learning_rate": 7.407854592599373e-06, "loss": 17.4093, "step": 19688 }, { "epoch": 0.35989909883561516, "grad_norm": 6.565035756992462, "learning_rate": 7.407595161758601e-06, "loss": 17.8111, "step": 19689 }, { "epoch": 0.3599173780320617, "grad_norm": 7.039274284803448, "learning_rate": 7.407335722479415e-06, "loss": 17.7455, "step": 19690 }, { "epoch": 0.35993565722850823, "grad_norm": 6.696144712301817, "learning_rate": 7.407076274762727e-06, "loss": 17.4365, "step": 19691 }, { "epoch": 0.35995393642495477, "grad_norm": 6.206007733573061, "learning_rate": 7.406816818609445e-06, "loss": 17.5794, "step": 19692 }, { "epoch": 0.3599722156214013, "grad_norm": 8.493781781946268, "learning_rate": 7.406557354020478e-06, "loss": 18.0151, "step": 19693 }, { "epoch": 0.3599904948178478, "grad_norm": 7.626843398616922, "learning_rate": 7.406297880996738e-06, "loss": 18.0236, "step": 19694 }, { "epoch": 0.3600087740142943, "grad_norm": 6.942237055756598, "learning_rate": 7.406038399539133e-06, "loss": 17.555, "step": 19695 }, { "epoch": 0.36002705321074085, "grad_norm": 6.615219864669865, "learning_rate": 7.405778909648571e-06, "loss": 17.548, "step": 19696 }, { "epoch": 0.3600453324071874, "grad_norm": 6.089896410559852, "learning_rate": 7.405519411325962e-06, "loss": 17.6716, "step": 19697 }, { "epoch": 0.3600636116036339, "grad_norm": 5.7815032990880715, "learning_rate": 7.405259904572218e-06, "loss": 17.4445, "step": 19698 }, { "epoch": 0.3600818908000804, "grad_norm": 7.686329575038699, "learning_rate": 7.405000389388246e-06, "loss": 17.9146, "step": 19699 }, { "epoch": 0.36010016999652694, "grad_norm": 9.216626247454519, "learning_rate": 7.404740865774956e-06, "loss": 18.9646, "step": 19700 }, { "epoch": 0.3601184491929735, "grad_norm": 6.141974855088552, "learning_rate": 7.404481333733258e-06, "loss": 17.4394, "step": 19701 }, { "epoch": 0.36013672838942, "grad_norm": 6.143262419367804, "learning_rate": 7.404221793264062e-06, "loss": 17.4181, "step": 19702 }, { "epoch": 0.36015500758586655, "grad_norm": 5.082826702009226, "learning_rate": 7.403962244368278e-06, "loss": 16.8467, "step": 19703 }, { "epoch": 0.36017328678231303, "grad_norm": 7.543973184689073, "learning_rate": 7.403702687046813e-06, "loss": 18.0566, "step": 19704 }, { "epoch": 0.36019156597875956, "grad_norm": 6.4485019873172496, "learning_rate": 7.40344312130058e-06, "loss": 17.5813, "step": 19705 }, { "epoch": 0.3602098451752061, "grad_norm": 6.611479357754433, "learning_rate": 7.4031835471304865e-06, "loss": 18.2218, "step": 19706 }, { "epoch": 0.36022812437165264, "grad_norm": 6.891696190007479, "learning_rate": 7.402923964537444e-06, "loss": 17.6386, "step": 19707 }, { "epoch": 0.36024640356809917, "grad_norm": 6.611510491468658, "learning_rate": 7.4026643735223615e-06, "loss": 17.9912, "step": 19708 }, { "epoch": 0.36026468276454565, "grad_norm": 5.8347357933883925, "learning_rate": 7.402404774086149e-06, "loss": 17.4233, "step": 19709 }, { "epoch": 0.3602829619609922, "grad_norm": 5.884081020716994, "learning_rate": 7.402145166229715e-06, "loss": 17.4612, "step": 19710 }, { "epoch": 0.3603012411574387, "grad_norm": 4.938148253713487, "learning_rate": 7.401885549953972e-06, "loss": 16.6768, "step": 19711 }, { "epoch": 0.36031952035388526, "grad_norm": 6.006304997933859, "learning_rate": 7.401625925259828e-06, "loss": 17.3801, "step": 19712 }, { "epoch": 0.3603377995503318, "grad_norm": 5.800407938715674, "learning_rate": 7.401366292148195e-06, "loss": 17.4753, "step": 19713 }, { "epoch": 0.3603560787467783, "grad_norm": 6.698856927273207, "learning_rate": 7.40110665061998e-06, "loss": 17.4803, "step": 19714 }, { "epoch": 0.3603743579432248, "grad_norm": 5.772585813016475, "learning_rate": 7.400847000676094e-06, "loss": 17.2274, "step": 19715 }, { "epoch": 0.36039263713967135, "grad_norm": 6.647914607446682, "learning_rate": 7.400587342317448e-06, "loss": 17.5091, "step": 19716 }, { "epoch": 0.3604109163361179, "grad_norm": 8.63686879543823, "learning_rate": 7.400327675544953e-06, "loss": 18.5806, "step": 19717 }, { "epoch": 0.36042919553256436, "grad_norm": 7.582196247257612, "learning_rate": 7.400068000359517e-06, "loss": 18.1169, "step": 19718 }, { "epoch": 0.3604474747290109, "grad_norm": 6.340555182234325, "learning_rate": 7.39980831676205e-06, "loss": 17.6335, "step": 19719 }, { "epoch": 0.36046575392545743, "grad_norm": 6.6482684547007675, "learning_rate": 7.399548624753464e-06, "loss": 17.6152, "step": 19720 }, { "epoch": 0.36048403312190397, "grad_norm": 5.909967438297958, "learning_rate": 7.399288924334669e-06, "loss": 17.4626, "step": 19721 }, { "epoch": 0.3605023123183505, "grad_norm": 6.005151272205654, "learning_rate": 7.399029215506573e-06, "loss": 17.4018, "step": 19722 }, { "epoch": 0.360520591514797, "grad_norm": 7.12043830715943, "learning_rate": 7.398769498270089e-06, "loss": 17.5324, "step": 19723 }, { "epoch": 0.3605388707112435, "grad_norm": 5.024518202423369, "learning_rate": 7.398509772626125e-06, "loss": 16.9543, "step": 19724 }, { "epoch": 0.36055714990769006, "grad_norm": 6.777717028682303, "learning_rate": 7.398250038575592e-06, "loss": 17.6355, "step": 19725 }, { "epoch": 0.3605754291041366, "grad_norm": 6.549358447587129, "learning_rate": 7.397990296119402e-06, "loss": 17.546, "step": 19726 }, { "epoch": 0.3605937083005831, "grad_norm": 6.683853120093345, "learning_rate": 7.397730545258465e-06, "loss": 17.6049, "step": 19727 }, { "epoch": 0.3606119874970296, "grad_norm": 6.851753428347234, "learning_rate": 7.3974707859936875e-06, "loss": 17.3464, "step": 19728 }, { "epoch": 0.36063026669347614, "grad_norm": 4.364639334883509, "learning_rate": 7.397211018325985e-06, "loss": 16.718, "step": 19729 }, { "epoch": 0.3606485458899227, "grad_norm": 7.298185805244727, "learning_rate": 7.396951242256265e-06, "loss": 18.0385, "step": 19730 }, { "epoch": 0.3606668250863692, "grad_norm": 6.818757348557367, "learning_rate": 7.396691457785441e-06, "loss": 17.7227, "step": 19731 }, { "epoch": 0.36068510428281575, "grad_norm": 6.243456883073716, "learning_rate": 7.39643166491442e-06, "loss": 17.4463, "step": 19732 }, { "epoch": 0.36070338347926223, "grad_norm": 7.381302393938972, "learning_rate": 7.396171863644112e-06, "loss": 17.7082, "step": 19733 }, { "epoch": 0.36072166267570877, "grad_norm": 7.6168563506880975, "learning_rate": 7.395912053975432e-06, "loss": 18.3822, "step": 19734 }, { "epoch": 0.3607399418721553, "grad_norm": 7.06514164437329, "learning_rate": 7.395652235909287e-06, "loss": 17.9721, "step": 19735 }, { "epoch": 0.36075822106860184, "grad_norm": 6.251879282186458, "learning_rate": 7.39539240944659e-06, "loss": 17.3321, "step": 19736 }, { "epoch": 0.36077650026504837, "grad_norm": 8.067429593127484, "learning_rate": 7.3951325745882495e-06, "loss": 17.7748, "step": 19737 }, { "epoch": 0.36079477946149485, "grad_norm": 6.340137944056408, "learning_rate": 7.394872731335177e-06, "loss": 17.0545, "step": 19738 }, { "epoch": 0.3608130586579414, "grad_norm": 7.021389629665991, "learning_rate": 7.394612879688285e-06, "loss": 17.594, "step": 19739 }, { "epoch": 0.3608313378543879, "grad_norm": 6.903716597008266, "learning_rate": 7.3943530196484815e-06, "loss": 17.6848, "step": 19740 }, { "epoch": 0.36084961705083446, "grad_norm": 7.48213399992343, "learning_rate": 7.394093151216679e-06, "loss": 18.2631, "step": 19741 }, { "epoch": 0.360867896247281, "grad_norm": 5.734938813390531, "learning_rate": 7.393833274393786e-06, "loss": 17.0842, "step": 19742 }, { "epoch": 0.3608861754437275, "grad_norm": 6.20768880816114, "learning_rate": 7.393573389180716e-06, "loss": 17.5858, "step": 19743 }, { "epoch": 0.360904454640174, "grad_norm": 6.778458834230044, "learning_rate": 7.393313495578381e-06, "loss": 17.7546, "step": 19744 }, { "epoch": 0.36092273383662055, "grad_norm": 5.675230261919206, "learning_rate": 7.393053593587688e-06, "loss": 17.2096, "step": 19745 }, { "epoch": 0.3609410130330671, "grad_norm": 5.775884309642294, "learning_rate": 7.392793683209549e-06, "loss": 17.075, "step": 19746 }, { "epoch": 0.3609592922295136, "grad_norm": 6.958690337990445, "learning_rate": 7.392533764444876e-06, "loss": 17.7834, "step": 19747 }, { "epoch": 0.3609775714259601, "grad_norm": 7.253467290962211, "learning_rate": 7.392273837294581e-06, "loss": 17.8554, "step": 19748 }, { "epoch": 0.36099585062240663, "grad_norm": 6.391119217486749, "learning_rate": 7.3920139017595735e-06, "loss": 17.5906, "step": 19749 }, { "epoch": 0.36101412981885317, "grad_norm": 7.451194477337203, "learning_rate": 7.391753957840765e-06, "loss": 17.5156, "step": 19750 }, { "epoch": 0.3610324090152997, "grad_norm": 7.478802256143311, "learning_rate": 7.391494005539066e-06, "loss": 17.6013, "step": 19751 }, { "epoch": 0.3610506882117462, "grad_norm": 6.444095772568898, "learning_rate": 7.391234044855388e-06, "loss": 17.4123, "step": 19752 }, { "epoch": 0.3610689674081927, "grad_norm": 6.233308545837031, "learning_rate": 7.390974075790643e-06, "loss": 17.3002, "step": 19753 }, { "epoch": 0.36108724660463926, "grad_norm": 6.2841775562864495, "learning_rate": 7.390714098345739e-06, "loss": 17.6364, "step": 19754 }, { "epoch": 0.3611055258010858, "grad_norm": 6.062349416057556, "learning_rate": 7.390454112521592e-06, "loss": 17.3652, "step": 19755 }, { "epoch": 0.3611238049975323, "grad_norm": 6.675362618049272, "learning_rate": 7.3901941183191095e-06, "loss": 17.7102, "step": 19756 }, { "epoch": 0.3611420841939788, "grad_norm": 5.779234254884737, "learning_rate": 7.389934115739204e-06, "loss": 17.1497, "step": 19757 }, { "epoch": 0.36116036339042534, "grad_norm": 6.523897847285966, "learning_rate": 7.389674104782789e-06, "loss": 17.7741, "step": 19758 }, { "epoch": 0.3611786425868719, "grad_norm": 6.619671923116873, "learning_rate": 7.389414085450772e-06, "loss": 17.6544, "step": 19759 }, { "epoch": 0.3611969217833184, "grad_norm": 7.02593384430311, "learning_rate": 7.389154057744066e-06, "loss": 17.8876, "step": 19760 }, { "epoch": 0.36121520097976495, "grad_norm": 7.397148017211546, "learning_rate": 7.388894021663581e-06, "loss": 17.9427, "step": 19761 }, { "epoch": 0.36123348017621143, "grad_norm": 6.055158587464948, "learning_rate": 7.388633977210231e-06, "loss": 17.3072, "step": 19762 }, { "epoch": 0.36125175937265797, "grad_norm": 5.619037949473037, "learning_rate": 7.388373924384926e-06, "loss": 17.247, "step": 19763 }, { "epoch": 0.3612700385691045, "grad_norm": 6.593007971613619, "learning_rate": 7.388113863188579e-06, "loss": 17.7705, "step": 19764 }, { "epoch": 0.36128831776555104, "grad_norm": 6.80874070597616, "learning_rate": 7.387853793622099e-06, "loss": 17.6813, "step": 19765 }, { "epoch": 0.3613065969619976, "grad_norm": 8.362611850273602, "learning_rate": 7.387593715686399e-06, "loss": 18.5448, "step": 19766 }, { "epoch": 0.36132487615844405, "grad_norm": 6.786788258346048, "learning_rate": 7.387333629382388e-06, "loss": 17.598, "step": 19767 }, { "epoch": 0.3613431553548906, "grad_norm": 5.684770298024673, "learning_rate": 7.387073534710982e-06, "loss": 17.2738, "step": 19768 }, { "epoch": 0.3613614345513371, "grad_norm": 6.338173972597751, "learning_rate": 7.38681343167309e-06, "loss": 17.5468, "step": 19769 }, { "epoch": 0.36137971374778366, "grad_norm": 8.451045048153572, "learning_rate": 7.386553320269625e-06, "loss": 18.3305, "step": 19770 }, { "epoch": 0.3613979929442302, "grad_norm": 7.1551800330201125, "learning_rate": 7.386293200501495e-06, "loss": 17.7602, "step": 19771 }, { "epoch": 0.3614162721406767, "grad_norm": 6.1481840629691025, "learning_rate": 7.386033072369619e-06, "loss": 17.4353, "step": 19772 }, { "epoch": 0.3614345513371232, "grad_norm": 8.949106858349216, "learning_rate": 7.3857729358749e-06, "loss": 17.7564, "step": 19773 }, { "epoch": 0.36145283053356975, "grad_norm": 7.304820160434771, "learning_rate": 7.385512791018255e-06, "loss": 17.9803, "step": 19774 }, { "epoch": 0.3614711097300163, "grad_norm": 7.3603973195622086, "learning_rate": 7.3852526378005955e-06, "loss": 18.0031, "step": 19775 }, { "epoch": 0.3614893889264628, "grad_norm": 5.999500649438277, "learning_rate": 7.384992476222832e-06, "loss": 17.3192, "step": 19776 }, { "epoch": 0.3615076681229093, "grad_norm": 7.537500465756095, "learning_rate": 7.384732306285875e-06, "loss": 17.9028, "step": 19777 }, { "epoch": 0.36152594731935583, "grad_norm": 6.726759715914112, "learning_rate": 7.384472127990641e-06, "loss": 17.7602, "step": 19778 }, { "epoch": 0.36154422651580237, "grad_norm": 6.548891995168657, "learning_rate": 7.384211941338038e-06, "loss": 17.719, "step": 19779 }, { "epoch": 0.3615625057122489, "grad_norm": 5.862858606696997, "learning_rate": 7.383951746328979e-06, "loss": 17.2742, "step": 19780 }, { "epoch": 0.36158078490869544, "grad_norm": 6.781399042218442, "learning_rate": 7.383691542964376e-06, "loss": 18.1223, "step": 19781 }, { "epoch": 0.3615990641051419, "grad_norm": 6.087743190092235, "learning_rate": 7.383431331245142e-06, "loss": 17.3174, "step": 19782 }, { "epoch": 0.36161734330158846, "grad_norm": 6.2910179355940254, "learning_rate": 7.383171111172186e-06, "loss": 17.2643, "step": 19783 }, { "epoch": 0.361635622498035, "grad_norm": 6.218268411467764, "learning_rate": 7.382910882746424e-06, "loss": 17.3961, "step": 19784 }, { "epoch": 0.3616539016944815, "grad_norm": 5.730710339353957, "learning_rate": 7.382650645968764e-06, "loss": 16.9912, "step": 19785 }, { "epoch": 0.361672180890928, "grad_norm": 6.022333613509842, "learning_rate": 7.382390400840123e-06, "loss": 17.1482, "step": 19786 }, { "epoch": 0.36169046008737454, "grad_norm": 7.196930519841331, "learning_rate": 7.382130147361408e-06, "loss": 17.9276, "step": 19787 }, { "epoch": 0.3617087392838211, "grad_norm": 6.0683435637162395, "learning_rate": 7.381869885533534e-06, "loss": 17.3092, "step": 19788 }, { "epoch": 0.3617270184802676, "grad_norm": 7.343149765349085, "learning_rate": 7.381609615357414e-06, "loss": 18.0614, "step": 19789 }, { "epoch": 0.36174529767671415, "grad_norm": 5.5653076492380675, "learning_rate": 7.381349336833958e-06, "loss": 17.3303, "step": 19790 }, { "epoch": 0.36176357687316063, "grad_norm": 5.080857582127793, "learning_rate": 7.381089049964078e-06, "loss": 17.013, "step": 19791 }, { "epoch": 0.36178185606960717, "grad_norm": 7.169992833963652, "learning_rate": 7.38082875474869e-06, "loss": 17.5835, "step": 19792 }, { "epoch": 0.3618001352660537, "grad_norm": 6.034150606404791, "learning_rate": 7.380568451188702e-06, "loss": 17.4684, "step": 19793 }, { "epoch": 0.36181841446250024, "grad_norm": 7.196883464812891, "learning_rate": 7.3803081392850286e-06, "loss": 17.6147, "step": 19794 }, { "epoch": 0.3618366936589468, "grad_norm": 7.120685145067033, "learning_rate": 7.380047819038583e-06, "loss": 17.9046, "step": 19795 }, { "epoch": 0.36185497285539325, "grad_norm": 5.37868830159824, "learning_rate": 7.3797874904502744e-06, "loss": 17.0191, "step": 19796 }, { "epoch": 0.3618732520518398, "grad_norm": 7.297146558207694, "learning_rate": 7.3795271535210175e-06, "loss": 17.8612, "step": 19797 }, { "epoch": 0.3618915312482863, "grad_norm": 6.835203495703539, "learning_rate": 7.379266808251725e-06, "loss": 17.6131, "step": 19798 }, { "epoch": 0.36190981044473286, "grad_norm": 6.051211154481783, "learning_rate": 7.3790064546433096e-06, "loss": 17.3336, "step": 19799 }, { "epoch": 0.3619280896411794, "grad_norm": 9.885438003116352, "learning_rate": 7.378746092696682e-06, "loss": 17.6307, "step": 19800 }, { "epoch": 0.3619463688376259, "grad_norm": 9.13616426789303, "learning_rate": 7.378485722412756e-06, "loss": 17.9851, "step": 19801 }, { "epoch": 0.3619646480340724, "grad_norm": 6.680500048478998, "learning_rate": 7.3782253437924434e-06, "loss": 17.4634, "step": 19802 }, { "epoch": 0.36198292723051895, "grad_norm": 7.965927476562711, "learning_rate": 7.377964956836658e-06, "loss": 18.0622, "step": 19803 }, { "epoch": 0.3620012064269655, "grad_norm": 6.075774584032624, "learning_rate": 7.377704561546311e-06, "loss": 17.4991, "step": 19804 }, { "epoch": 0.362019485623412, "grad_norm": 5.394030899585471, "learning_rate": 7.377444157922318e-06, "loss": 17.1865, "step": 19805 }, { "epoch": 0.3620377648198585, "grad_norm": 6.494119032171042, "learning_rate": 7.377183745965587e-06, "loss": 17.2804, "step": 19806 }, { "epoch": 0.36205604401630503, "grad_norm": 6.971741388121176, "learning_rate": 7.3769233256770346e-06, "loss": 17.7178, "step": 19807 }, { "epoch": 0.36207432321275157, "grad_norm": 7.57425777775483, "learning_rate": 7.3766628970575716e-06, "loss": 17.7694, "step": 19808 }, { "epoch": 0.3620926024091981, "grad_norm": 6.448874817621395, "learning_rate": 7.3764024601081105e-06, "loss": 17.3395, "step": 19809 }, { "epoch": 0.36211088160564464, "grad_norm": 7.60523362352674, "learning_rate": 7.376142014829566e-06, "loss": 17.8962, "step": 19810 }, { "epoch": 0.3621291608020911, "grad_norm": 6.5349476392425885, "learning_rate": 7.3758815612228505e-06, "loss": 17.5655, "step": 19811 }, { "epoch": 0.36214743999853766, "grad_norm": 6.597569027456001, "learning_rate": 7.375621099288875e-06, "loss": 17.5343, "step": 19812 }, { "epoch": 0.3621657191949842, "grad_norm": 8.332972762924195, "learning_rate": 7.375360629028556e-06, "loss": 18.2068, "step": 19813 }, { "epoch": 0.36218399839143073, "grad_norm": 6.058549509102406, "learning_rate": 7.375100150442802e-06, "loss": 17.3782, "step": 19814 }, { "epoch": 0.36220227758787726, "grad_norm": 6.656909869088897, "learning_rate": 7.3748396635325284e-06, "loss": 17.7636, "step": 19815 }, { "epoch": 0.36222055678432374, "grad_norm": 8.63263441180892, "learning_rate": 7.374579168298648e-06, "loss": 18.1313, "step": 19816 }, { "epoch": 0.3622388359807703, "grad_norm": 6.344480129066718, "learning_rate": 7.374318664742075e-06, "loss": 17.5452, "step": 19817 }, { "epoch": 0.3622571151772168, "grad_norm": 8.058106029602351, "learning_rate": 7.374058152863719e-06, "loss": 18.2145, "step": 19818 }, { "epoch": 0.36227539437366335, "grad_norm": 7.686560264536356, "learning_rate": 7.373797632664497e-06, "loss": 18.156, "step": 19819 }, { "epoch": 0.36229367357010983, "grad_norm": 6.453915937680067, "learning_rate": 7.373537104145318e-06, "loss": 17.696, "step": 19820 }, { "epoch": 0.36231195276655637, "grad_norm": 6.338557925126497, "learning_rate": 7.373276567307099e-06, "loss": 17.6531, "step": 19821 }, { "epoch": 0.3623302319630029, "grad_norm": 7.44949599558676, "learning_rate": 7.373016022150752e-06, "loss": 17.9402, "step": 19822 }, { "epoch": 0.36234851115944944, "grad_norm": 5.511826999001526, "learning_rate": 7.372755468677188e-06, "loss": 17.1546, "step": 19823 }, { "epoch": 0.362366790355896, "grad_norm": 6.730621061072508, "learning_rate": 7.372494906887324e-06, "loss": 17.5534, "step": 19824 }, { "epoch": 0.36238506955234245, "grad_norm": 6.597743266117303, "learning_rate": 7.37223433678207e-06, "loss": 17.886, "step": 19825 }, { "epoch": 0.362403348748789, "grad_norm": 6.424877650774905, "learning_rate": 7.371973758362341e-06, "loss": 17.7819, "step": 19826 }, { "epoch": 0.3624216279452355, "grad_norm": 6.081115763625361, "learning_rate": 7.371713171629051e-06, "loss": 17.4009, "step": 19827 }, { "epoch": 0.36243990714168206, "grad_norm": 6.027815519647787, "learning_rate": 7.371452576583109e-06, "loss": 17.4071, "step": 19828 }, { "epoch": 0.3624581863381286, "grad_norm": 6.908653643836321, "learning_rate": 7.371191973225433e-06, "loss": 18.0503, "step": 19829 }, { "epoch": 0.3624764655345751, "grad_norm": 9.657029465211973, "learning_rate": 7.370931361556936e-06, "loss": 18.0939, "step": 19830 }, { "epoch": 0.3624947447310216, "grad_norm": 6.602626637139704, "learning_rate": 7.37067074157853e-06, "loss": 17.6102, "step": 19831 }, { "epoch": 0.36251302392746815, "grad_norm": 5.74620240840169, "learning_rate": 7.370410113291129e-06, "loss": 17.0892, "step": 19832 }, { "epoch": 0.3625313031239147, "grad_norm": 5.600716101090447, "learning_rate": 7.370149476695644e-06, "loss": 17.1531, "step": 19833 }, { "epoch": 0.3625495823203612, "grad_norm": 6.647988825642275, "learning_rate": 7.3698888317929924e-06, "loss": 17.6309, "step": 19834 }, { "epoch": 0.3625678615168077, "grad_norm": 7.462942426129583, "learning_rate": 7.369628178584087e-06, "loss": 18.1157, "step": 19835 }, { "epoch": 0.36258614071325423, "grad_norm": 6.950999798039686, "learning_rate": 7.369367517069839e-06, "loss": 17.818, "step": 19836 }, { "epoch": 0.36260441990970077, "grad_norm": 5.928236702927465, "learning_rate": 7.369106847251164e-06, "loss": 17.324, "step": 19837 }, { "epoch": 0.3626226991061473, "grad_norm": 8.025803946220389, "learning_rate": 7.368846169128975e-06, "loss": 17.9005, "step": 19838 }, { "epoch": 0.36264097830259384, "grad_norm": 7.188681587313006, "learning_rate": 7.368585482704186e-06, "loss": 17.7076, "step": 19839 }, { "epoch": 0.3626592574990403, "grad_norm": 5.946788418432133, "learning_rate": 7.3683247879777094e-06, "loss": 17.3071, "step": 19840 }, { "epoch": 0.36267753669548686, "grad_norm": 8.783183882784503, "learning_rate": 7.368064084950461e-06, "loss": 18.1716, "step": 19841 }, { "epoch": 0.3626958158919334, "grad_norm": 8.32619851133781, "learning_rate": 7.367803373623352e-06, "loss": 18.4285, "step": 19842 }, { "epoch": 0.36271409508837993, "grad_norm": 5.768828605822886, "learning_rate": 7.3675426539973e-06, "loss": 17.2132, "step": 19843 }, { "epoch": 0.36273237428482646, "grad_norm": 7.295614907201202, "learning_rate": 7.3672819260732155e-06, "loss": 17.9062, "step": 19844 }, { "epoch": 0.36275065348127294, "grad_norm": 7.601003625095559, "learning_rate": 7.367021189852013e-06, "loss": 17.755, "step": 19845 }, { "epoch": 0.3627689326777195, "grad_norm": 7.25728850681366, "learning_rate": 7.366760445334607e-06, "loss": 17.588, "step": 19846 }, { "epoch": 0.362787211874166, "grad_norm": 7.435540056191759, "learning_rate": 7.366499692521909e-06, "loss": 17.9894, "step": 19847 }, { "epoch": 0.36280549107061255, "grad_norm": 6.575627358714186, "learning_rate": 7.3662389314148375e-06, "loss": 17.651, "step": 19848 }, { "epoch": 0.3628237702670591, "grad_norm": 6.107729087499006, "learning_rate": 7.3659781620143035e-06, "loss": 17.3608, "step": 19849 }, { "epoch": 0.36284204946350557, "grad_norm": 7.461839777289414, "learning_rate": 7.36571738432122e-06, "loss": 17.6875, "step": 19850 }, { "epoch": 0.3628603286599521, "grad_norm": 6.015353779103699, "learning_rate": 7.365456598336504e-06, "loss": 17.3833, "step": 19851 }, { "epoch": 0.36287860785639864, "grad_norm": 7.613494342322678, "learning_rate": 7.3651958040610675e-06, "loss": 18.0758, "step": 19852 }, { "epoch": 0.3628968870528452, "grad_norm": 8.27991365474283, "learning_rate": 7.364935001495823e-06, "loss": 18.2216, "step": 19853 }, { "epoch": 0.36291516624929165, "grad_norm": 7.206607014140489, "learning_rate": 7.364674190641688e-06, "loss": 17.7533, "step": 19854 }, { "epoch": 0.3629334454457382, "grad_norm": 6.2258669349276, "learning_rate": 7.3644133714995754e-06, "loss": 17.4918, "step": 19855 }, { "epoch": 0.3629517246421847, "grad_norm": 5.830931908101433, "learning_rate": 7.364152544070399e-06, "loss": 17.4619, "step": 19856 }, { "epoch": 0.36297000383863126, "grad_norm": 6.744025648768919, "learning_rate": 7.363891708355074e-06, "loss": 17.8524, "step": 19857 }, { "epoch": 0.3629882830350778, "grad_norm": 5.33544559027682, "learning_rate": 7.363630864354513e-06, "loss": 17.122, "step": 19858 }, { "epoch": 0.3630065622315243, "grad_norm": 7.120475255009561, "learning_rate": 7.363370012069631e-06, "loss": 17.8644, "step": 19859 }, { "epoch": 0.3630248414279708, "grad_norm": 6.415446205178347, "learning_rate": 7.36310915150134e-06, "loss": 17.4951, "step": 19860 }, { "epoch": 0.36304312062441735, "grad_norm": 5.1713900764061735, "learning_rate": 7.362848282650559e-06, "loss": 16.9743, "step": 19861 }, { "epoch": 0.3630613998208639, "grad_norm": 6.582510831952927, "learning_rate": 7.362587405518199e-06, "loss": 17.458, "step": 19862 }, { "epoch": 0.3630796790173104, "grad_norm": 7.14838707725858, "learning_rate": 7.3623265201051755e-06, "loss": 17.4343, "step": 19863 }, { "epoch": 0.3630979582137569, "grad_norm": 6.030890566137289, "learning_rate": 7.362065626412402e-06, "loss": 17.5821, "step": 19864 }, { "epoch": 0.36311623741020344, "grad_norm": 6.132200128666458, "learning_rate": 7.361804724440793e-06, "loss": 17.5141, "step": 19865 }, { "epoch": 0.36313451660664997, "grad_norm": 6.755722703473308, "learning_rate": 7.361543814191266e-06, "loss": 17.4266, "step": 19866 }, { "epoch": 0.3631527958030965, "grad_norm": 5.901362078986602, "learning_rate": 7.36128289566473e-06, "loss": 17.4349, "step": 19867 }, { "epoch": 0.36317107499954304, "grad_norm": 7.375293085928717, "learning_rate": 7.361021968862103e-06, "loss": 18.1263, "step": 19868 }, { "epoch": 0.3631893541959895, "grad_norm": 7.949065423837825, "learning_rate": 7.3607610337842995e-06, "loss": 18.0082, "step": 19869 }, { "epoch": 0.36320763339243606, "grad_norm": 7.529454143613692, "learning_rate": 7.360500090432232e-06, "loss": 18.2168, "step": 19870 }, { "epoch": 0.3632259125888826, "grad_norm": 5.57568043384929, "learning_rate": 7.360239138806818e-06, "loss": 17.1047, "step": 19871 }, { "epoch": 0.36324419178532913, "grad_norm": 5.331238942134828, "learning_rate": 7.359978178908972e-06, "loss": 16.9609, "step": 19872 }, { "epoch": 0.36326247098177566, "grad_norm": 5.665163282601551, "learning_rate": 7.359717210739605e-06, "loss": 17.2756, "step": 19873 }, { "epoch": 0.36328075017822214, "grad_norm": 6.414277189172481, "learning_rate": 7.3594562342996335e-06, "loss": 17.6562, "step": 19874 }, { "epoch": 0.3632990293746687, "grad_norm": 6.986932385311707, "learning_rate": 7.359195249589974e-06, "loss": 17.5632, "step": 19875 }, { "epoch": 0.3633173085711152, "grad_norm": 6.595780216320281, "learning_rate": 7.3589342566115406e-06, "loss": 17.6591, "step": 19876 }, { "epoch": 0.36333558776756175, "grad_norm": 9.75605735647986, "learning_rate": 7.358673255365245e-06, "loss": 19.0344, "step": 19877 }, { "epoch": 0.3633538669640083, "grad_norm": 9.233791121504725, "learning_rate": 7.358412245852007e-06, "loss": 18.404, "step": 19878 }, { "epoch": 0.36337214616045477, "grad_norm": 7.148687535834478, "learning_rate": 7.358151228072736e-06, "loss": 17.9507, "step": 19879 }, { "epoch": 0.3633904253569013, "grad_norm": 6.465007259481233, "learning_rate": 7.357890202028351e-06, "loss": 17.4709, "step": 19880 }, { "epoch": 0.36340870455334784, "grad_norm": 6.286347588352254, "learning_rate": 7.3576291677197655e-06, "loss": 17.3946, "step": 19881 }, { "epoch": 0.3634269837497944, "grad_norm": 4.960147903218949, "learning_rate": 7.357368125147895e-06, "loss": 16.9839, "step": 19882 }, { "epoch": 0.3634452629462409, "grad_norm": 7.9545658929064755, "learning_rate": 7.3571070743136515e-06, "loss": 18.2448, "step": 19883 }, { "epoch": 0.3634635421426874, "grad_norm": 5.9500038086809495, "learning_rate": 7.3568460152179545e-06, "loss": 17.5407, "step": 19884 }, { "epoch": 0.3634818213391339, "grad_norm": 8.391662365068228, "learning_rate": 7.356584947861716e-06, "loss": 18.2017, "step": 19885 }, { "epoch": 0.36350010053558046, "grad_norm": 8.43666846600439, "learning_rate": 7.356323872245852e-06, "loss": 18.4896, "step": 19886 }, { "epoch": 0.363518379732027, "grad_norm": 6.651142502786835, "learning_rate": 7.356062788371277e-06, "loss": 17.5591, "step": 19887 }, { "epoch": 0.3635366589284735, "grad_norm": 6.580105254687807, "learning_rate": 7.355801696238906e-06, "loss": 17.5632, "step": 19888 }, { "epoch": 0.36355493812492, "grad_norm": 6.534458383635875, "learning_rate": 7.3555405958496555e-06, "loss": 17.4418, "step": 19889 }, { "epoch": 0.36357321732136655, "grad_norm": 7.116823096185072, "learning_rate": 7.3552794872044385e-06, "loss": 17.8299, "step": 19890 }, { "epoch": 0.3635914965178131, "grad_norm": 6.319830041837158, "learning_rate": 7.355018370304172e-06, "loss": 17.7226, "step": 19891 }, { "epoch": 0.3636097757142596, "grad_norm": 5.650490274725987, "learning_rate": 7.35475724514977e-06, "loss": 17.1894, "step": 19892 }, { "epoch": 0.3636280549107061, "grad_norm": 6.31767404409286, "learning_rate": 7.354496111742149e-06, "loss": 17.6089, "step": 19893 }, { "epoch": 0.36364633410715264, "grad_norm": 7.319743799716542, "learning_rate": 7.354234970082223e-06, "loss": 17.6796, "step": 19894 }, { "epoch": 0.36366461330359917, "grad_norm": 7.574336442661001, "learning_rate": 7.353973820170906e-06, "loss": 17.582, "step": 19895 }, { "epoch": 0.3636828925000457, "grad_norm": 6.055400818121726, "learning_rate": 7.353712662009118e-06, "loss": 17.562, "step": 19896 }, { "epoch": 0.36370117169649224, "grad_norm": 5.941978445605599, "learning_rate": 7.35345149559777e-06, "loss": 17.2626, "step": 19897 }, { "epoch": 0.3637194508929387, "grad_norm": 5.331945161886824, "learning_rate": 7.353190320937778e-06, "loss": 17.1074, "step": 19898 }, { "epoch": 0.36373773008938526, "grad_norm": 7.415834593382804, "learning_rate": 7.352929138030059e-06, "loss": 18.073, "step": 19899 }, { "epoch": 0.3637560092858318, "grad_norm": 7.025386381980766, "learning_rate": 7.352667946875528e-06, "loss": 17.3585, "step": 19900 }, { "epoch": 0.36377428848227833, "grad_norm": 6.2390589600593245, "learning_rate": 7.352406747475098e-06, "loss": 17.484, "step": 19901 }, { "epoch": 0.36379256767872487, "grad_norm": 7.228486929245648, "learning_rate": 7.352145539829688e-06, "loss": 17.651, "step": 19902 }, { "epoch": 0.36381084687517135, "grad_norm": 5.342575900198373, "learning_rate": 7.351884323940212e-06, "loss": 17.0671, "step": 19903 }, { "epoch": 0.3638291260716179, "grad_norm": 6.952441270837234, "learning_rate": 7.351623099807587e-06, "loss": 17.7346, "step": 19904 }, { "epoch": 0.3638474052680644, "grad_norm": 5.874155239855435, "learning_rate": 7.351361867432725e-06, "loss": 17.4286, "step": 19905 }, { "epoch": 0.36386568446451095, "grad_norm": 6.257066167438005, "learning_rate": 7.351100626816544e-06, "loss": 17.5954, "step": 19906 }, { "epoch": 0.3638839636609575, "grad_norm": 6.795264780061543, "learning_rate": 7.350839377959959e-06, "loss": 17.6907, "step": 19907 }, { "epoch": 0.36390224285740397, "grad_norm": 10.194669551376359, "learning_rate": 7.350578120863887e-06, "loss": 18.3099, "step": 19908 }, { "epoch": 0.3639205220538505, "grad_norm": 5.402891267281715, "learning_rate": 7.350316855529243e-06, "loss": 17.2023, "step": 19909 }, { "epoch": 0.36393880125029704, "grad_norm": 6.010307659767309, "learning_rate": 7.35005558195694e-06, "loss": 17.5061, "step": 19910 }, { "epoch": 0.3639570804467436, "grad_norm": 5.967855881697756, "learning_rate": 7.3497943001478975e-06, "loss": 17.4228, "step": 19911 }, { "epoch": 0.3639753596431901, "grad_norm": 6.897902739331548, "learning_rate": 7.34953301010303e-06, "loss": 17.7083, "step": 19912 }, { "epoch": 0.3639936388396366, "grad_norm": 7.135653523181321, "learning_rate": 7.349271711823255e-06, "loss": 17.8317, "step": 19913 }, { "epoch": 0.3640119180360831, "grad_norm": 6.64145514082436, "learning_rate": 7.3490104053094845e-06, "loss": 17.4163, "step": 19914 }, { "epoch": 0.36403019723252966, "grad_norm": 6.376695234039627, "learning_rate": 7.348749090562636e-06, "loss": 17.5942, "step": 19915 }, { "epoch": 0.3640484764289762, "grad_norm": 4.790855748747493, "learning_rate": 7.348487767583625e-06, "loss": 16.8697, "step": 19916 }, { "epoch": 0.36406675562542273, "grad_norm": 8.041839296634185, "learning_rate": 7.348226436373371e-06, "loss": 17.9628, "step": 19917 }, { "epoch": 0.3640850348218692, "grad_norm": 7.822606379322255, "learning_rate": 7.347965096932785e-06, "loss": 18.3822, "step": 19918 }, { "epoch": 0.36410331401831575, "grad_norm": 5.845155756889289, "learning_rate": 7.347703749262787e-06, "loss": 17.2784, "step": 19919 }, { "epoch": 0.3641215932147623, "grad_norm": 6.453666345030061, "learning_rate": 7.3474423933642895e-06, "loss": 17.7771, "step": 19920 }, { "epoch": 0.3641398724112088, "grad_norm": 6.952256037911246, "learning_rate": 7.34718102923821e-06, "loss": 17.9244, "step": 19921 }, { "epoch": 0.3641581516076553, "grad_norm": 5.726640790921278, "learning_rate": 7.3469196568854654e-06, "loss": 17.2561, "step": 19922 }, { "epoch": 0.36417643080410184, "grad_norm": 6.158958516929366, "learning_rate": 7.346658276306971e-06, "loss": 17.3897, "step": 19923 }, { "epoch": 0.36419471000054837, "grad_norm": 6.7314552207787, "learning_rate": 7.3463968875036415e-06, "loss": 17.5779, "step": 19924 }, { "epoch": 0.3642129891969949, "grad_norm": 5.953983189035276, "learning_rate": 7.346135490476396e-06, "loss": 17.1681, "step": 19925 }, { "epoch": 0.36423126839344144, "grad_norm": 7.448260645280754, "learning_rate": 7.345874085226149e-06, "loss": 18.071, "step": 19926 }, { "epoch": 0.3642495475898879, "grad_norm": 7.135589902163268, "learning_rate": 7.3456126717538165e-06, "loss": 17.9099, "step": 19927 }, { "epoch": 0.36426782678633446, "grad_norm": 7.811948117017589, "learning_rate": 7.3453512500603155e-06, "loss": 18.1919, "step": 19928 }, { "epoch": 0.364286105982781, "grad_norm": 6.4819789518858775, "learning_rate": 7.345089820146561e-06, "loss": 17.391, "step": 19929 }, { "epoch": 0.36430438517922753, "grad_norm": 6.573790459434949, "learning_rate": 7.3448283820134714e-06, "loss": 17.4223, "step": 19930 }, { "epoch": 0.36432266437567407, "grad_norm": 7.142233545797336, "learning_rate": 7.344566935661963e-06, "loss": 17.9212, "step": 19931 }, { "epoch": 0.36434094357212055, "grad_norm": 7.815281670207444, "learning_rate": 7.344305481092948e-06, "loss": 18.5594, "step": 19932 }, { "epoch": 0.3643592227685671, "grad_norm": 6.389263015084949, "learning_rate": 7.344044018307347e-06, "loss": 17.6672, "step": 19933 }, { "epoch": 0.3643775019650136, "grad_norm": 6.363778951246994, "learning_rate": 7.3437825473060756e-06, "loss": 17.5988, "step": 19934 }, { "epoch": 0.36439578116146015, "grad_norm": 7.878011337387179, "learning_rate": 7.34352106809005e-06, "loss": 18.1643, "step": 19935 }, { "epoch": 0.3644140603579067, "grad_norm": 8.285066118286098, "learning_rate": 7.343259580660185e-06, "loss": 18.3539, "step": 19936 }, { "epoch": 0.36443233955435317, "grad_norm": 6.443267824682111, "learning_rate": 7.3429980850173985e-06, "loss": 17.3013, "step": 19937 }, { "epoch": 0.3644506187507997, "grad_norm": 6.650738875390846, "learning_rate": 7.342736581162608e-06, "loss": 17.6297, "step": 19938 }, { "epoch": 0.36446889794724624, "grad_norm": 8.323867892700868, "learning_rate": 7.342475069096729e-06, "loss": 18.2189, "step": 19939 }, { "epoch": 0.3644871771436928, "grad_norm": 8.029657663627717, "learning_rate": 7.342213548820678e-06, "loss": 17.9917, "step": 19940 }, { "epoch": 0.3645054563401393, "grad_norm": 5.902466295141075, "learning_rate": 7.3419520203353736e-06, "loss": 17.5369, "step": 19941 }, { "epoch": 0.3645237355365858, "grad_norm": 6.6950706084737, "learning_rate": 7.341690483641727e-06, "loss": 17.6478, "step": 19942 }, { "epoch": 0.3645420147330323, "grad_norm": 7.414295409257129, "learning_rate": 7.34142893874066e-06, "loss": 18.0049, "step": 19943 }, { "epoch": 0.36456029392947886, "grad_norm": 6.441432619775502, "learning_rate": 7.341167385633089e-06, "loss": 17.3596, "step": 19944 }, { "epoch": 0.3645785731259254, "grad_norm": 7.565466444403028, "learning_rate": 7.340905824319928e-06, "loss": 17.9493, "step": 19945 }, { "epoch": 0.36459685232237193, "grad_norm": 6.884218125613176, "learning_rate": 7.3406442548020965e-06, "loss": 17.9367, "step": 19946 }, { "epoch": 0.3646151315188184, "grad_norm": 8.205436379367864, "learning_rate": 7.340382677080509e-06, "loss": 18.2755, "step": 19947 }, { "epoch": 0.36463341071526495, "grad_norm": 7.479586387863585, "learning_rate": 7.3401210911560825e-06, "loss": 18.084, "step": 19948 }, { "epoch": 0.3646516899117115, "grad_norm": 5.456945197338324, "learning_rate": 7.3398594970297365e-06, "loss": 17.2475, "step": 19949 }, { "epoch": 0.364669969108158, "grad_norm": 7.155232689620144, "learning_rate": 7.339597894702385e-06, "loss": 18.057, "step": 19950 }, { "epoch": 0.36468824830460456, "grad_norm": 6.315182426164225, "learning_rate": 7.339336284174946e-06, "loss": 17.4557, "step": 19951 }, { "epoch": 0.36470652750105104, "grad_norm": 6.869441070444373, "learning_rate": 7.339074665448336e-06, "loss": 17.7521, "step": 19952 }, { "epoch": 0.36472480669749757, "grad_norm": 5.832337154707694, "learning_rate": 7.338813038523473e-06, "loss": 17.2121, "step": 19953 }, { "epoch": 0.3647430858939441, "grad_norm": 7.771324661108997, "learning_rate": 7.338551403401273e-06, "loss": 18.1004, "step": 19954 }, { "epoch": 0.36476136509039064, "grad_norm": 5.4355066147730575, "learning_rate": 7.338289760082653e-06, "loss": 17.1734, "step": 19955 }, { "epoch": 0.3647796442868371, "grad_norm": 6.067806744073409, "learning_rate": 7.33802810856853e-06, "loss": 17.3221, "step": 19956 }, { "epoch": 0.36479792348328366, "grad_norm": 5.627941981217168, "learning_rate": 7.337766448859822e-06, "loss": 17.2232, "step": 19957 }, { "epoch": 0.3648162026797302, "grad_norm": 7.304743637762822, "learning_rate": 7.337504780957446e-06, "loss": 17.9585, "step": 19958 }, { "epoch": 0.36483448187617673, "grad_norm": 6.371991764224059, "learning_rate": 7.337243104862317e-06, "loss": 17.4396, "step": 19959 }, { "epoch": 0.36485276107262327, "grad_norm": 7.272901108269467, "learning_rate": 7.336981420575354e-06, "loss": 17.7912, "step": 19960 }, { "epoch": 0.36487104026906975, "grad_norm": 6.709934676998213, "learning_rate": 7.336719728097475e-06, "loss": 17.6917, "step": 19961 }, { "epoch": 0.3648893194655163, "grad_norm": 7.241521777883362, "learning_rate": 7.336458027429596e-06, "loss": 17.9274, "step": 19962 }, { "epoch": 0.3649075986619628, "grad_norm": 6.401168922310908, "learning_rate": 7.336196318572635e-06, "loss": 17.6308, "step": 19963 }, { "epoch": 0.36492587785840935, "grad_norm": 5.128963487143801, "learning_rate": 7.335934601527507e-06, "loss": 17.2691, "step": 19964 }, { "epoch": 0.3649441570548559, "grad_norm": 6.057947574781268, "learning_rate": 7.335672876295131e-06, "loss": 17.4541, "step": 19965 }, { "epoch": 0.36496243625130237, "grad_norm": 8.199589475868523, "learning_rate": 7.335411142876425e-06, "loss": 17.8986, "step": 19966 }, { "epoch": 0.3649807154477489, "grad_norm": 5.152101468473128, "learning_rate": 7.335149401272306e-06, "loss": 16.9409, "step": 19967 }, { "epoch": 0.36499899464419544, "grad_norm": 7.64284625076132, "learning_rate": 7.33488765148369e-06, "loss": 17.8299, "step": 19968 }, { "epoch": 0.365017273840642, "grad_norm": 6.694127243468344, "learning_rate": 7.334625893511496e-06, "loss": 17.6618, "step": 19969 }, { "epoch": 0.3650355530370885, "grad_norm": 6.137374494704486, "learning_rate": 7.33436412735664e-06, "loss": 17.1838, "step": 19970 }, { "epoch": 0.365053832233535, "grad_norm": 6.492198993151892, "learning_rate": 7.334102353020041e-06, "loss": 17.8877, "step": 19971 }, { "epoch": 0.3650721114299815, "grad_norm": 5.802896208555929, "learning_rate": 7.333840570502616e-06, "loss": 17.2948, "step": 19972 }, { "epoch": 0.36509039062642806, "grad_norm": 7.132221431844595, "learning_rate": 7.333578779805282e-06, "loss": 17.6006, "step": 19973 }, { "epoch": 0.3651086698228746, "grad_norm": 6.093304534442225, "learning_rate": 7.333316980928956e-06, "loss": 17.3567, "step": 19974 }, { "epoch": 0.36512694901932113, "grad_norm": 6.436127130333897, "learning_rate": 7.333055173874558e-06, "loss": 17.4512, "step": 19975 }, { "epoch": 0.3651452282157676, "grad_norm": 6.954979280215868, "learning_rate": 7.332793358643003e-06, "loss": 17.6193, "step": 19976 }, { "epoch": 0.36516350741221415, "grad_norm": 5.556852294777728, "learning_rate": 7.33253153523521e-06, "loss": 17.1742, "step": 19977 }, { "epoch": 0.3651817866086607, "grad_norm": 6.905918436637122, "learning_rate": 7.332269703652098e-06, "loss": 17.6606, "step": 19978 }, { "epoch": 0.3652000658051072, "grad_norm": 7.1322970346039885, "learning_rate": 7.332007863894582e-06, "loss": 17.18, "step": 19979 }, { "epoch": 0.36521834500155376, "grad_norm": 6.8587204657493555, "learning_rate": 7.331746015963579e-06, "loss": 17.7386, "step": 19980 }, { "epoch": 0.36523662419800024, "grad_norm": 6.772290370521658, "learning_rate": 7.331484159860009e-06, "loss": 17.7342, "step": 19981 }, { "epoch": 0.3652549033944468, "grad_norm": 7.231616881412896, "learning_rate": 7.331222295584791e-06, "loss": 17.6909, "step": 19982 }, { "epoch": 0.3652731825908933, "grad_norm": 7.793296884804323, "learning_rate": 7.33096042313884e-06, "loss": 18.1183, "step": 19983 }, { "epoch": 0.36529146178733984, "grad_norm": 6.483927942294078, "learning_rate": 7.330698542523076e-06, "loss": 17.2938, "step": 19984 }, { "epoch": 0.3653097409837864, "grad_norm": 6.057767800886469, "learning_rate": 7.330436653738415e-06, "loss": 17.5713, "step": 19985 }, { "epoch": 0.36532802018023286, "grad_norm": 6.1906657356421215, "learning_rate": 7.330174756785777e-06, "loss": 17.2538, "step": 19986 }, { "epoch": 0.3653462993766794, "grad_norm": 7.278490927171888, "learning_rate": 7.329912851666077e-06, "loss": 17.6953, "step": 19987 }, { "epoch": 0.36536457857312593, "grad_norm": 5.9615259558238, "learning_rate": 7.329650938380235e-06, "loss": 17.3261, "step": 19988 }, { "epoch": 0.36538285776957247, "grad_norm": 5.550292146699225, "learning_rate": 7.329389016929169e-06, "loss": 17.1694, "step": 19989 }, { "epoch": 0.36540113696601895, "grad_norm": 6.412002416771385, "learning_rate": 7.3291270873137964e-06, "loss": 17.7491, "step": 19990 }, { "epoch": 0.3654194161624655, "grad_norm": 6.143702555565244, "learning_rate": 7.328865149535036e-06, "loss": 17.4818, "step": 19991 }, { "epoch": 0.365437695358912, "grad_norm": 7.844841567040807, "learning_rate": 7.328603203593805e-06, "loss": 17.7499, "step": 19992 }, { "epoch": 0.36545597455535855, "grad_norm": 8.015703335755985, "learning_rate": 7.328341249491022e-06, "loss": 17.7027, "step": 19993 }, { "epoch": 0.3654742537518051, "grad_norm": 6.263571151195266, "learning_rate": 7.3280792872276055e-06, "loss": 17.3522, "step": 19994 }, { "epoch": 0.36549253294825157, "grad_norm": 7.541199669687731, "learning_rate": 7.327817316804472e-06, "loss": 17.7727, "step": 19995 }, { "epoch": 0.3655108121446981, "grad_norm": 7.378806630387118, "learning_rate": 7.3275553382225405e-06, "loss": 17.955, "step": 19996 }, { "epoch": 0.36552909134114464, "grad_norm": 6.7678685546807085, "learning_rate": 7.32729335148273e-06, "loss": 17.4339, "step": 19997 }, { "epoch": 0.3655473705375912, "grad_norm": 7.12057927862782, "learning_rate": 7.327031356585959e-06, "loss": 17.3648, "step": 19998 }, { "epoch": 0.3655656497340377, "grad_norm": 8.71501300379711, "learning_rate": 7.326769353533144e-06, "loss": 17.7721, "step": 19999 }, { "epoch": 0.3655839289304842, "grad_norm": 7.299752176009435, "learning_rate": 7.326507342325206e-06, "loss": 18.0457, "step": 20000 }, { "epoch": 0.36560220812693073, "grad_norm": 7.3599804513870914, "learning_rate": 7.32624532296306e-06, "loss": 17.6514, "step": 20001 }, { "epoch": 0.36562048732337726, "grad_norm": 5.3486958687936745, "learning_rate": 7.325983295447626e-06, "loss": 16.9642, "step": 20002 }, { "epoch": 0.3656387665198238, "grad_norm": 6.557372272729643, "learning_rate": 7.325721259779824e-06, "loss": 17.619, "step": 20003 }, { "epoch": 0.36565704571627033, "grad_norm": 5.416973195391767, "learning_rate": 7.325459215960569e-06, "loss": 17.2945, "step": 20004 }, { "epoch": 0.3656753249127168, "grad_norm": 5.989822328414677, "learning_rate": 7.325197163990783e-06, "loss": 17.1684, "step": 20005 }, { "epoch": 0.36569360410916335, "grad_norm": 6.011230896835203, "learning_rate": 7.32493510387138e-06, "loss": 17.2764, "step": 20006 }, { "epoch": 0.3657118833056099, "grad_norm": 6.343896300084955, "learning_rate": 7.324673035603283e-06, "loss": 17.2743, "step": 20007 }, { "epoch": 0.3657301625020564, "grad_norm": 8.424063219074782, "learning_rate": 7.324410959187409e-06, "loss": 18.5565, "step": 20008 }, { "epoch": 0.36574844169850296, "grad_norm": 5.454175634280242, "learning_rate": 7.324148874624676e-06, "loss": 17.1443, "step": 20009 }, { "epoch": 0.36576672089494944, "grad_norm": 6.962785575902715, "learning_rate": 7.323886781916003e-06, "loss": 17.6528, "step": 20010 }, { "epoch": 0.365785000091396, "grad_norm": 6.398342119715435, "learning_rate": 7.323624681062308e-06, "loss": 17.469, "step": 20011 }, { "epoch": 0.3658032792878425, "grad_norm": 6.982471194952962, "learning_rate": 7.323362572064509e-06, "loss": 17.7279, "step": 20012 }, { "epoch": 0.36582155848428904, "grad_norm": 5.722768016888922, "learning_rate": 7.323100454923529e-06, "loss": 17.3035, "step": 20013 }, { "epoch": 0.3658398376807356, "grad_norm": 6.207848962254209, "learning_rate": 7.322838329640281e-06, "loss": 17.4449, "step": 20014 }, { "epoch": 0.36585811687718206, "grad_norm": 5.52371366053853, "learning_rate": 7.322576196215687e-06, "loss": 17.0434, "step": 20015 }, { "epoch": 0.3658763960736286, "grad_norm": 6.447198465491182, "learning_rate": 7.322314054650664e-06, "loss": 17.4525, "step": 20016 }, { "epoch": 0.36589467527007513, "grad_norm": 5.772447503822162, "learning_rate": 7.322051904946134e-06, "loss": 17.204, "step": 20017 }, { "epoch": 0.36591295446652167, "grad_norm": 5.598459571819927, "learning_rate": 7.321789747103012e-06, "loss": 17.2012, "step": 20018 }, { "epoch": 0.3659312336629682, "grad_norm": 6.772302513544603, "learning_rate": 7.321527581122218e-06, "loss": 17.5079, "step": 20019 }, { "epoch": 0.3659495128594147, "grad_norm": 8.395492804046095, "learning_rate": 7.3212654070046715e-06, "loss": 18.0994, "step": 20020 }, { "epoch": 0.3659677920558612, "grad_norm": 5.442244433328405, "learning_rate": 7.321003224751292e-06, "loss": 16.9976, "step": 20021 }, { "epoch": 0.36598607125230775, "grad_norm": 4.894323867458015, "learning_rate": 7.320741034362997e-06, "loss": 16.9411, "step": 20022 }, { "epoch": 0.3660043504487543, "grad_norm": 6.376324414241229, "learning_rate": 7.3204788358407065e-06, "loss": 17.4495, "step": 20023 }, { "epoch": 0.36602262964520077, "grad_norm": 7.014361650828988, "learning_rate": 7.320216629185339e-06, "loss": 17.6281, "step": 20024 }, { "epoch": 0.3660409088416473, "grad_norm": 6.3606585805402585, "learning_rate": 7.319954414397813e-06, "loss": 17.4682, "step": 20025 }, { "epoch": 0.36605918803809384, "grad_norm": 6.883518029555075, "learning_rate": 7.3196921914790485e-06, "loss": 17.637, "step": 20026 }, { "epoch": 0.3660774672345404, "grad_norm": 7.022462850259167, "learning_rate": 7.319429960429965e-06, "loss": 17.7141, "step": 20027 }, { "epoch": 0.3660957464309869, "grad_norm": 6.239354392444819, "learning_rate": 7.319167721251479e-06, "loss": 17.3864, "step": 20028 }, { "epoch": 0.3661140256274334, "grad_norm": 4.969791983872103, "learning_rate": 7.318905473944512e-06, "loss": 16.8889, "step": 20029 }, { "epoch": 0.36613230482387993, "grad_norm": 6.228127877753289, "learning_rate": 7.318643218509982e-06, "loss": 17.5305, "step": 20030 }, { "epoch": 0.36615058402032646, "grad_norm": 10.772890395687297, "learning_rate": 7.31838095494881e-06, "loss": 18.2895, "step": 20031 }, { "epoch": 0.366168863216773, "grad_norm": 7.2522352411852555, "learning_rate": 7.318118683261915e-06, "loss": 17.7977, "step": 20032 }, { "epoch": 0.36618714241321954, "grad_norm": 7.486696173260041, "learning_rate": 7.317856403450212e-06, "loss": 18.1596, "step": 20033 }, { "epoch": 0.366205421609666, "grad_norm": 6.07141209553588, "learning_rate": 7.317594115514626e-06, "loss": 17.4972, "step": 20034 }, { "epoch": 0.36622370080611255, "grad_norm": 6.372265499279799, "learning_rate": 7.317331819456073e-06, "loss": 17.5198, "step": 20035 }, { "epoch": 0.3662419800025591, "grad_norm": 8.42351503298585, "learning_rate": 7.317069515275472e-06, "loss": 18.2196, "step": 20036 }, { "epoch": 0.3662602591990056, "grad_norm": 7.328440958729051, "learning_rate": 7.316807202973744e-06, "loss": 18.0784, "step": 20037 }, { "epoch": 0.36627853839545216, "grad_norm": 5.967004691863931, "learning_rate": 7.316544882551808e-06, "loss": 17.1939, "step": 20038 }, { "epoch": 0.36629681759189864, "grad_norm": 6.792899665896095, "learning_rate": 7.3162825540105834e-06, "loss": 17.5523, "step": 20039 }, { "epoch": 0.3663150967883452, "grad_norm": 6.016111889403537, "learning_rate": 7.31602021735099e-06, "loss": 17.2861, "step": 20040 }, { "epoch": 0.3663333759847917, "grad_norm": 5.728117369895846, "learning_rate": 7.315757872573946e-06, "loss": 17.3807, "step": 20041 }, { "epoch": 0.36635165518123824, "grad_norm": 9.82960673985697, "learning_rate": 7.315495519680371e-06, "loss": 19.2907, "step": 20042 }, { "epoch": 0.3663699343776848, "grad_norm": 6.578645153903663, "learning_rate": 7.315233158671186e-06, "loss": 17.7264, "step": 20043 }, { "epoch": 0.36638821357413126, "grad_norm": 7.27288412308479, "learning_rate": 7.3149707895473096e-06, "loss": 17.8259, "step": 20044 }, { "epoch": 0.3664064927705778, "grad_norm": 7.088161976089492, "learning_rate": 7.314708412309661e-06, "loss": 17.7448, "step": 20045 }, { "epoch": 0.36642477196702433, "grad_norm": 9.611423527380586, "learning_rate": 7.314446026959161e-06, "loss": 17.7218, "step": 20046 }, { "epoch": 0.36644305116347087, "grad_norm": 7.10924466864736, "learning_rate": 7.314183633496728e-06, "loss": 17.8319, "step": 20047 }, { "epoch": 0.3664613303599174, "grad_norm": 6.762878693982524, "learning_rate": 7.313921231923282e-06, "loss": 17.8862, "step": 20048 }, { "epoch": 0.3664796095563639, "grad_norm": 7.040128425206224, "learning_rate": 7.313658822239743e-06, "loss": 17.5381, "step": 20049 }, { "epoch": 0.3664978887528104, "grad_norm": 6.2413025502073864, "learning_rate": 7.31339640444703e-06, "loss": 17.2786, "step": 20050 }, { "epoch": 0.36651616794925695, "grad_norm": 5.821097701028158, "learning_rate": 7.313133978546065e-06, "loss": 17.2855, "step": 20051 }, { "epoch": 0.3665344471457035, "grad_norm": 6.420080152777128, "learning_rate": 7.312871544537765e-06, "loss": 17.5517, "step": 20052 }, { "epoch": 0.36655272634215, "grad_norm": 6.687833941744186, "learning_rate": 7.312609102423052e-06, "loss": 17.0203, "step": 20053 }, { "epoch": 0.3665710055385965, "grad_norm": 7.4913413588200575, "learning_rate": 7.312346652202843e-06, "loss": 18.13, "step": 20054 }, { "epoch": 0.36658928473504304, "grad_norm": 6.378464109102114, "learning_rate": 7.312084193878061e-06, "loss": 17.5577, "step": 20055 }, { "epoch": 0.3666075639314896, "grad_norm": 6.46936764170632, "learning_rate": 7.311821727449624e-06, "loss": 17.4391, "step": 20056 }, { "epoch": 0.3666258431279361, "grad_norm": 6.938693382391579, "learning_rate": 7.311559252918452e-06, "loss": 17.7051, "step": 20057 }, { "epoch": 0.3666441223243826, "grad_norm": 6.756617300125149, "learning_rate": 7.311296770285467e-06, "loss": 17.748, "step": 20058 }, { "epoch": 0.36666240152082913, "grad_norm": 7.063743985939497, "learning_rate": 7.3110342795515865e-06, "loss": 17.5138, "step": 20059 }, { "epoch": 0.36668068071727566, "grad_norm": 6.830333458443167, "learning_rate": 7.310771780717729e-06, "loss": 17.8553, "step": 20060 }, { "epoch": 0.3666989599137222, "grad_norm": 7.060236435351416, "learning_rate": 7.310509273784821e-06, "loss": 17.798, "step": 20061 }, { "epoch": 0.36671723911016874, "grad_norm": 6.933666121958821, "learning_rate": 7.310246758753776e-06, "loss": 17.826, "step": 20062 }, { "epoch": 0.3667355183066152, "grad_norm": 8.6564575951896, "learning_rate": 7.309984235625518e-06, "loss": 18.0475, "step": 20063 }, { "epoch": 0.36675379750306175, "grad_norm": 6.021194253401691, "learning_rate": 7.309721704400965e-06, "loss": 17.2236, "step": 20064 }, { "epoch": 0.3667720766995083, "grad_norm": 8.03573630844235, "learning_rate": 7.309459165081037e-06, "loss": 18.3012, "step": 20065 }, { "epoch": 0.3667903558959548, "grad_norm": 6.409128167831567, "learning_rate": 7.309196617666657e-06, "loss": 17.5102, "step": 20066 }, { "epoch": 0.36680863509240136, "grad_norm": 5.691077165759169, "learning_rate": 7.308934062158741e-06, "loss": 17.4877, "step": 20067 }, { "epoch": 0.36682691428884784, "grad_norm": 7.946724345294653, "learning_rate": 7.308671498558213e-06, "loss": 18.0304, "step": 20068 }, { "epoch": 0.3668451934852944, "grad_norm": 6.276899429007991, "learning_rate": 7.308408926865991e-06, "loss": 17.5556, "step": 20069 }, { "epoch": 0.3668634726817409, "grad_norm": 8.054993608283159, "learning_rate": 7.308146347082996e-06, "loss": 18.115, "step": 20070 }, { "epoch": 0.36688175187818745, "grad_norm": 6.73741360233731, "learning_rate": 7.307883759210148e-06, "loss": 17.7996, "step": 20071 }, { "epoch": 0.366900031074634, "grad_norm": 6.637461375419106, "learning_rate": 7.3076211632483695e-06, "loss": 17.5056, "step": 20072 }, { "epoch": 0.36691831027108046, "grad_norm": 6.408352045887416, "learning_rate": 7.307358559198578e-06, "loss": 17.5988, "step": 20073 }, { "epoch": 0.366936589467527, "grad_norm": 7.497068300084706, "learning_rate": 7.307095947061694e-06, "loss": 17.4475, "step": 20074 }, { "epoch": 0.36695486866397353, "grad_norm": 7.083633864362394, "learning_rate": 7.306833326838641e-06, "loss": 17.7188, "step": 20075 }, { "epoch": 0.36697314786042007, "grad_norm": 5.704502153833018, "learning_rate": 7.306570698530336e-06, "loss": 17.2738, "step": 20076 }, { "epoch": 0.3669914270568666, "grad_norm": 6.596272202193835, "learning_rate": 7.306308062137702e-06, "loss": 17.5896, "step": 20077 }, { "epoch": 0.3670097062533131, "grad_norm": 5.400517468672541, "learning_rate": 7.3060454176616555e-06, "loss": 17.1224, "step": 20078 }, { "epoch": 0.3670279854497596, "grad_norm": 7.2614347856671815, "learning_rate": 7.3057827651031225e-06, "loss": 17.918, "step": 20079 }, { "epoch": 0.36704626464620616, "grad_norm": 5.905622146665631, "learning_rate": 7.305520104463019e-06, "loss": 17.1705, "step": 20080 }, { "epoch": 0.3670645438426527, "grad_norm": 5.873681135369336, "learning_rate": 7.30525743574227e-06, "loss": 17.1603, "step": 20081 }, { "epoch": 0.3670828230390992, "grad_norm": 5.945053141843443, "learning_rate": 7.304994758941792e-06, "loss": 17.3761, "step": 20082 }, { "epoch": 0.3671011022355457, "grad_norm": 8.528893648065019, "learning_rate": 7.304732074062508e-06, "loss": 18.5653, "step": 20083 }, { "epoch": 0.36711938143199224, "grad_norm": 6.767456007725406, "learning_rate": 7.3044693811053395e-06, "loss": 17.8274, "step": 20084 }, { "epoch": 0.3671376606284388, "grad_norm": 7.488586284702607, "learning_rate": 7.304206680071204e-06, "loss": 17.8009, "step": 20085 }, { "epoch": 0.3671559398248853, "grad_norm": 6.5844851393308925, "learning_rate": 7.303943970961025e-06, "loss": 17.4793, "step": 20086 }, { "epoch": 0.36717421902133185, "grad_norm": 5.177449408518992, "learning_rate": 7.303681253775721e-06, "loss": 17.1093, "step": 20087 }, { "epoch": 0.36719249821777833, "grad_norm": 7.252141770205381, "learning_rate": 7.303418528516214e-06, "loss": 17.6935, "step": 20088 }, { "epoch": 0.36721077741422486, "grad_norm": 5.407830787162609, "learning_rate": 7.303155795183427e-06, "loss": 17.032, "step": 20089 }, { "epoch": 0.3672290566106714, "grad_norm": 5.407756889535193, "learning_rate": 7.302893053778277e-06, "loss": 17.2391, "step": 20090 }, { "epoch": 0.36724733580711794, "grad_norm": 5.398190716370125, "learning_rate": 7.302630304301688e-06, "loss": 17.0918, "step": 20091 }, { "epoch": 0.3672656150035644, "grad_norm": 6.664543300439525, "learning_rate": 7.302367546754577e-06, "loss": 17.6589, "step": 20092 }, { "epoch": 0.36728389420001095, "grad_norm": 7.744316272679232, "learning_rate": 7.302104781137871e-06, "loss": 18.0601, "step": 20093 }, { "epoch": 0.3673021733964575, "grad_norm": 7.400419416615188, "learning_rate": 7.3018420074524856e-06, "loss": 17.8153, "step": 20094 }, { "epoch": 0.367320452592904, "grad_norm": 7.730658448004205, "learning_rate": 7.301579225699344e-06, "loss": 17.9133, "step": 20095 }, { "epoch": 0.36733873178935056, "grad_norm": 7.870582895323564, "learning_rate": 7.301316435879366e-06, "loss": 18.2224, "step": 20096 }, { "epoch": 0.36735701098579704, "grad_norm": 5.7632077535216215, "learning_rate": 7.301053637993476e-06, "loss": 17.2209, "step": 20097 }, { "epoch": 0.3673752901822436, "grad_norm": 6.723134070566391, "learning_rate": 7.30079083204259e-06, "loss": 17.5405, "step": 20098 }, { "epoch": 0.3673935693786901, "grad_norm": 8.821324588463753, "learning_rate": 7.300528018027634e-06, "loss": 18.5008, "step": 20099 }, { "epoch": 0.36741184857513665, "grad_norm": 6.1028228266027895, "learning_rate": 7.300265195949526e-06, "loss": 17.4973, "step": 20100 }, { "epoch": 0.3674301277715832, "grad_norm": 7.136781649492445, "learning_rate": 7.300002365809187e-06, "loss": 17.7945, "step": 20101 }, { "epoch": 0.36744840696802966, "grad_norm": 6.748100371495393, "learning_rate": 7.299739527607541e-06, "loss": 17.5044, "step": 20102 }, { "epoch": 0.3674666861644762, "grad_norm": 7.662613441268769, "learning_rate": 7.299476681345508e-06, "loss": 18.1845, "step": 20103 }, { "epoch": 0.36748496536092273, "grad_norm": 6.700378445390445, "learning_rate": 7.299213827024007e-06, "loss": 17.564, "step": 20104 }, { "epoch": 0.36750324455736927, "grad_norm": 6.7273354272376, "learning_rate": 7.298950964643961e-06, "loss": 17.5001, "step": 20105 }, { "epoch": 0.3675215237538158, "grad_norm": 8.701715193046008, "learning_rate": 7.298688094206292e-06, "loss": 18.1433, "step": 20106 }, { "epoch": 0.3675398029502623, "grad_norm": 7.906115467473902, "learning_rate": 7.298425215711922e-06, "loss": 17.9331, "step": 20107 }, { "epoch": 0.3675580821467088, "grad_norm": 5.709904934285042, "learning_rate": 7.298162329161769e-06, "loss": 17.3205, "step": 20108 }, { "epoch": 0.36757636134315536, "grad_norm": 6.515815760470419, "learning_rate": 7.297899434556757e-06, "loss": 17.5524, "step": 20109 }, { "epoch": 0.3675946405396019, "grad_norm": 5.267983022408444, "learning_rate": 7.297636531897807e-06, "loss": 16.9202, "step": 20110 }, { "epoch": 0.3676129197360484, "grad_norm": 6.826460841783871, "learning_rate": 7.2973736211858404e-06, "loss": 17.4843, "step": 20111 }, { "epoch": 0.3676311989324949, "grad_norm": 5.052098701405337, "learning_rate": 7.297110702421779e-06, "loss": 16.9792, "step": 20112 }, { "epoch": 0.36764947812894144, "grad_norm": 6.565128374893936, "learning_rate": 7.2968477756065446e-06, "loss": 17.3362, "step": 20113 }, { "epoch": 0.367667757325388, "grad_norm": 6.128296352871817, "learning_rate": 7.296584840741055e-06, "loss": 17.4375, "step": 20114 }, { "epoch": 0.3676860365218345, "grad_norm": 6.125640252298581, "learning_rate": 7.2963218978262375e-06, "loss": 17.266, "step": 20115 }, { "epoch": 0.36770431571828105, "grad_norm": 6.902902395075871, "learning_rate": 7.296058946863011e-06, "loss": 17.6244, "step": 20116 }, { "epoch": 0.36772259491472753, "grad_norm": 7.64173330540598, "learning_rate": 7.295795987852297e-06, "loss": 18.2167, "step": 20117 }, { "epoch": 0.36774087411117407, "grad_norm": 6.884629072605797, "learning_rate": 7.295533020795017e-06, "loss": 17.5537, "step": 20118 }, { "epoch": 0.3677591533076206, "grad_norm": 6.1274511937644185, "learning_rate": 7.295270045692091e-06, "loss": 17.3439, "step": 20119 }, { "epoch": 0.36777743250406714, "grad_norm": 6.622061460834097, "learning_rate": 7.295007062544446e-06, "loss": 17.4974, "step": 20120 }, { "epoch": 0.36779571170051367, "grad_norm": 6.366456852787825, "learning_rate": 7.294744071352999e-06, "loss": 17.3657, "step": 20121 }, { "epoch": 0.36781399089696015, "grad_norm": 6.999359129834391, "learning_rate": 7.294481072118673e-06, "loss": 17.7983, "step": 20122 }, { "epoch": 0.3678322700934067, "grad_norm": 6.185835948923508, "learning_rate": 7.2942180648423885e-06, "loss": 17.4683, "step": 20123 }, { "epoch": 0.3678505492898532, "grad_norm": 6.919377538042241, "learning_rate": 7.293955049525071e-06, "loss": 17.8182, "step": 20124 }, { "epoch": 0.36786882848629976, "grad_norm": 8.598542348260848, "learning_rate": 7.2936920261676395e-06, "loss": 18.0788, "step": 20125 }, { "epoch": 0.36788710768274624, "grad_norm": 6.762175903862375, "learning_rate": 7.293428994771017e-06, "loss": 17.6727, "step": 20126 }, { "epoch": 0.3679053868791928, "grad_norm": 6.078292989964342, "learning_rate": 7.293165955336125e-06, "loss": 17.3059, "step": 20127 }, { "epoch": 0.3679236660756393, "grad_norm": 6.885064048666509, "learning_rate": 7.292902907863883e-06, "loss": 17.7859, "step": 20128 }, { "epoch": 0.36794194527208585, "grad_norm": 6.8245340079871175, "learning_rate": 7.292639852355216e-06, "loss": 17.6658, "step": 20129 }, { "epoch": 0.3679602244685324, "grad_norm": 5.562108315960352, "learning_rate": 7.292376788811047e-06, "loss": 17.0449, "step": 20130 }, { "epoch": 0.36797850366497886, "grad_norm": 7.151082116488049, "learning_rate": 7.292113717232297e-06, "loss": 17.8598, "step": 20131 }, { "epoch": 0.3679967828614254, "grad_norm": 6.569337289212714, "learning_rate": 7.291850637619884e-06, "loss": 17.7569, "step": 20132 }, { "epoch": 0.36801506205787193, "grad_norm": 6.592275684741269, "learning_rate": 7.291587549974734e-06, "loss": 17.6656, "step": 20133 }, { "epoch": 0.36803334125431847, "grad_norm": 7.178208409049657, "learning_rate": 7.291324454297771e-06, "loss": 17.6371, "step": 20134 }, { "epoch": 0.368051620450765, "grad_norm": 7.485265496700924, "learning_rate": 7.291061350589913e-06, "loss": 18.1561, "step": 20135 }, { "epoch": 0.3680698996472115, "grad_norm": 7.948896447649479, "learning_rate": 7.290798238852084e-06, "loss": 17.9248, "step": 20136 }, { "epoch": 0.368088178843658, "grad_norm": 7.168305311563071, "learning_rate": 7.2905351190852055e-06, "loss": 17.8386, "step": 20137 }, { "epoch": 0.36810645804010456, "grad_norm": 5.664485484315749, "learning_rate": 7.290271991290201e-06, "loss": 17.13, "step": 20138 }, { "epoch": 0.3681247372365511, "grad_norm": 6.137259040566241, "learning_rate": 7.290008855467992e-06, "loss": 17.4592, "step": 20139 }, { "epoch": 0.3681430164329976, "grad_norm": 7.451550629648147, "learning_rate": 7.289745711619499e-06, "loss": 18.3272, "step": 20140 }, { "epoch": 0.3681612956294441, "grad_norm": 5.842546717482064, "learning_rate": 7.2894825597456475e-06, "loss": 17.2294, "step": 20141 }, { "epoch": 0.36817957482589064, "grad_norm": 7.217392933405763, "learning_rate": 7.289219399847358e-06, "loss": 18.2428, "step": 20142 }, { "epoch": 0.3681978540223372, "grad_norm": 7.084181205845155, "learning_rate": 7.288956231925552e-06, "loss": 17.5697, "step": 20143 }, { "epoch": 0.3682161332187837, "grad_norm": 5.796022375836719, "learning_rate": 7.288693055981156e-06, "loss": 17.2733, "step": 20144 }, { "epoch": 0.36823441241523025, "grad_norm": 6.625334101804693, "learning_rate": 7.288429872015087e-06, "loss": 17.5939, "step": 20145 }, { "epoch": 0.36825269161167673, "grad_norm": 6.874146281867213, "learning_rate": 7.28816668002827e-06, "loss": 17.5714, "step": 20146 }, { "epoch": 0.36827097080812327, "grad_norm": 6.428597716907134, "learning_rate": 7.287903480021627e-06, "loss": 17.5581, "step": 20147 }, { "epoch": 0.3682892500045698, "grad_norm": 5.285284881012578, "learning_rate": 7.287640271996082e-06, "loss": 16.989, "step": 20148 }, { "epoch": 0.36830752920101634, "grad_norm": 5.255396001001805, "learning_rate": 7.287377055952557e-06, "loss": 16.8976, "step": 20149 }, { "epoch": 0.3683258083974629, "grad_norm": 6.133955867592134, "learning_rate": 7.287113831891972e-06, "loss": 17.2857, "step": 20150 }, { "epoch": 0.36834408759390935, "grad_norm": 5.460204999978745, "learning_rate": 7.286850599815253e-06, "loss": 17.1928, "step": 20151 }, { "epoch": 0.3683623667903559, "grad_norm": 9.398947934144394, "learning_rate": 7.286587359723321e-06, "loss": 17.856, "step": 20152 }, { "epoch": 0.3683806459868024, "grad_norm": 6.430024112866958, "learning_rate": 7.286324111617098e-06, "loss": 17.398, "step": 20153 }, { "epoch": 0.36839892518324896, "grad_norm": 6.933947022277681, "learning_rate": 7.286060855497508e-06, "loss": 17.4871, "step": 20154 }, { "epoch": 0.3684172043796955, "grad_norm": 6.49910817450385, "learning_rate": 7.285797591365471e-06, "loss": 17.3752, "step": 20155 }, { "epoch": 0.368435483576142, "grad_norm": 6.501522383444161, "learning_rate": 7.285534319221914e-06, "loss": 17.5474, "step": 20156 }, { "epoch": 0.3684537627725885, "grad_norm": 6.8706472101460205, "learning_rate": 7.285271039067758e-06, "loss": 17.6611, "step": 20157 }, { "epoch": 0.36847204196903505, "grad_norm": 11.028480464771858, "learning_rate": 7.285007750903924e-06, "loss": 18.1285, "step": 20158 }, { "epoch": 0.3684903211654816, "grad_norm": 6.970470342012089, "learning_rate": 7.284744454731336e-06, "loss": 17.9104, "step": 20159 }, { "epoch": 0.36850860036192806, "grad_norm": 6.760825199052605, "learning_rate": 7.284481150550917e-06, "loss": 17.8917, "step": 20160 }, { "epoch": 0.3685268795583746, "grad_norm": 6.481066904222357, "learning_rate": 7.28421783836359e-06, "loss": 17.732, "step": 20161 }, { "epoch": 0.36854515875482113, "grad_norm": 5.683108799358094, "learning_rate": 7.283954518170279e-06, "loss": 17.3968, "step": 20162 }, { "epoch": 0.36856343795126767, "grad_norm": 5.7027389137600535, "learning_rate": 7.283691189971905e-06, "loss": 17.5697, "step": 20163 }, { "epoch": 0.3685817171477142, "grad_norm": 7.586150772960724, "learning_rate": 7.28342785376939e-06, "loss": 17.9641, "step": 20164 }, { "epoch": 0.3685999963441607, "grad_norm": 7.0170276217612555, "learning_rate": 7.28316450956366e-06, "loss": 17.6283, "step": 20165 }, { "epoch": 0.3686182755406072, "grad_norm": 8.547247398630253, "learning_rate": 7.282901157355635e-06, "loss": 17.8078, "step": 20166 }, { "epoch": 0.36863655473705376, "grad_norm": 6.7076971943592865, "learning_rate": 7.282637797146241e-06, "loss": 17.8502, "step": 20167 }, { "epoch": 0.3686548339335003, "grad_norm": 7.7097343986682505, "learning_rate": 7.2823744289364e-06, "loss": 17.678, "step": 20168 }, { "epoch": 0.36867311312994683, "grad_norm": 6.749417805144167, "learning_rate": 7.282111052727033e-06, "loss": 17.5471, "step": 20169 }, { "epoch": 0.3686913923263933, "grad_norm": 5.925547427443137, "learning_rate": 7.281847668519066e-06, "loss": 17.0655, "step": 20170 }, { "epoch": 0.36870967152283984, "grad_norm": 5.014707464621083, "learning_rate": 7.28158427631342e-06, "loss": 16.8923, "step": 20171 }, { "epoch": 0.3687279507192864, "grad_norm": 8.34346511944614, "learning_rate": 7.281320876111021e-06, "loss": 18.1639, "step": 20172 }, { "epoch": 0.3687462299157329, "grad_norm": 6.665248434135376, "learning_rate": 7.2810574679127886e-06, "loss": 17.5314, "step": 20173 }, { "epoch": 0.36876450911217945, "grad_norm": 6.58588464299712, "learning_rate": 7.280794051719647e-06, "loss": 17.7183, "step": 20174 }, { "epoch": 0.36878278830862593, "grad_norm": 6.760345052735897, "learning_rate": 7.280530627532521e-06, "loss": 17.6791, "step": 20175 }, { "epoch": 0.36880106750507247, "grad_norm": 6.586517927225698, "learning_rate": 7.280267195352335e-06, "loss": 17.5181, "step": 20176 }, { "epoch": 0.368819346701519, "grad_norm": 4.754023736789508, "learning_rate": 7.280003755180009e-06, "loss": 16.9561, "step": 20177 }, { "epoch": 0.36883762589796554, "grad_norm": 8.604932407798067, "learning_rate": 7.279740307016468e-06, "loss": 18.5146, "step": 20178 }, { "epoch": 0.3688559050944121, "grad_norm": 5.268657647543221, "learning_rate": 7.279476850862634e-06, "loss": 16.9788, "step": 20179 }, { "epoch": 0.36887418429085855, "grad_norm": 7.057785945547185, "learning_rate": 7.2792133867194314e-06, "loss": 17.814, "step": 20180 }, { "epoch": 0.3688924634873051, "grad_norm": 7.236374121449109, "learning_rate": 7.278949914587784e-06, "loss": 17.9099, "step": 20181 }, { "epoch": 0.3689107426837516, "grad_norm": 6.654765517746662, "learning_rate": 7.278686434468615e-06, "loss": 17.8197, "step": 20182 }, { "epoch": 0.36892902188019816, "grad_norm": 6.710378083383677, "learning_rate": 7.278422946362847e-06, "loss": 17.7071, "step": 20183 }, { "epoch": 0.3689473010766447, "grad_norm": 7.2149529704784925, "learning_rate": 7.2781594502714056e-06, "loss": 17.7761, "step": 20184 }, { "epoch": 0.3689655802730912, "grad_norm": 6.455532537998574, "learning_rate": 7.277895946195213e-06, "loss": 17.4167, "step": 20185 }, { "epoch": 0.3689838594695377, "grad_norm": 5.737671359493939, "learning_rate": 7.2776324341351925e-06, "loss": 17.2511, "step": 20186 }, { "epoch": 0.36900213866598425, "grad_norm": 7.3158062405044495, "learning_rate": 7.277368914092266e-06, "loss": 17.8465, "step": 20187 }, { "epoch": 0.3690204178624308, "grad_norm": 6.451035583476881, "learning_rate": 7.277105386067361e-06, "loss": 17.4541, "step": 20188 }, { "epoch": 0.3690386970588773, "grad_norm": 6.621442986884995, "learning_rate": 7.2768418500614e-06, "loss": 17.8096, "step": 20189 }, { "epoch": 0.3690569762553238, "grad_norm": 6.519777440519619, "learning_rate": 7.276578306075306e-06, "loss": 17.4998, "step": 20190 }, { "epoch": 0.36907525545177033, "grad_norm": 6.84816196470274, "learning_rate": 7.276314754110001e-06, "loss": 17.5994, "step": 20191 }, { "epoch": 0.36909353464821687, "grad_norm": 6.731177467542887, "learning_rate": 7.276051194166409e-06, "loss": 17.458, "step": 20192 }, { "epoch": 0.3691118138446634, "grad_norm": 9.501986421561949, "learning_rate": 7.275787626245459e-06, "loss": 18.2521, "step": 20193 }, { "epoch": 0.3691300930411099, "grad_norm": 6.501485311118727, "learning_rate": 7.2755240503480685e-06, "loss": 17.5143, "step": 20194 }, { "epoch": 0.3691483722375564, "grad_norm": 8.309874503389043, "learning_rate": 7.2752604664751634e-06, "loss": 18.1989, "step": 20195 }, { "epoch": 0.36916665143400296, "grad_norm": 7.4206115730466164, "learning_rate": 7.274996874627669e-06, "loss": 17.9945, "step": 20196 }, { "epoch": 0.3691849306304495, "grad_norm": 5.68336568933742, "learning_rate": 7.274733274806507e-06, "loss": 17.2302, "step": 20197 }, { "epoch": 0.36920320982689603, "grad_norm": 5.460141761208392, "learning_rate": 7.274469667012603e-06, "loss": 17.1825, "step": 20198 }, { "epoch": 0.3692214890233425, "grad_norm": 6.54903313345841, "learning_rate": 7.274206051246879e-06, "loss": 17.2857, "step": 20199 }, { "epoch": 0.36923976821978904, "grad_norm": 5.60409017180462, "learning_rate": 7.273942427510262e-06, "loss": 17.296, "step": 20200 }, { "epoch": 0.3692580474162356, "grad_norm": 5.352966850503021, "learning_rate": 7.273678795803671e-06, "loss": 16.9111, "step": 20201 }, { "epoch": 0.3692763266126821, "grad_norm": 7.657158870126983, "learning_rate": 7.273415156128037e-06, "loss": 18.1799, "step": 20202 }, { "epoch": 0.36929460580912865, "grad_norm": 6.281812384549631, "learning_rate": 7.273151508484278e-06, "loss": 17.1461, "step": 20203 }, { "epoch": 0.36931288500557513, "grad_norm": 5.657290687537265, "learning_rate": 7.2728878528733205e-06, "loss": 17.268, "step": 20204 }, { "epoch": 0.36933116420202167, "grad_norm": 7.615048088581312, "learning_rate": 7.272624189296088e-06, "loss": 17.7176, "step": 20205 }, { "epoch": 0.3693494433984682, "grad_norm": 8.85994191056542, "learning_rate": 7.272360517753505e-06, "loss": 18.8301, "step": 20206 }, { "epoch": 0.36936772259491474, "grad_norm": 6.823672506279992, "learning_rate": 7.272096838246496e-06, "loss": 17.9613, "step": 20207 }, { "epoch": 0.3693860017913613, "grad_norm": 6.95551203175244, "learning_rate": 7.271833150775984e-06, "loss": 17.7593, "step": 20208 }, { "epoch": 0.36940428098780775, "grad_norm": 5.5484462019423795, "learning_rate": 7.271569455342895e-06, "loss": 17.0792, "step": 20209 }, { "epoch": 0.3694225601842543, "grad_norm": 7.258949293892397, "learning_rate": 7.271305751948152e-06, "loss": 17.8223, "step": 20210 }, { "epoch": 0.3694408393807008, "grad_norm": 7.2421793358182995, "learning_rate": 7.2710420405926795e-06, "loss": 18.0179, "step": 20211 }, { "epoch": 0.36945911857714736, "grad_norm": 5.539469075281623, "learning_rate": 7.270778321277401e-06, "loss": 17.1364, "step": 20212 }, { "epoch": 0.3694773977735939, "grad_norm": 8.741657716263415, "learning_rate": 7.270514594003243e-06, "loss": 18.4418, "step": 20213 }, { "epoch": 0.3694956769700404, "grad_norm": 6.51638394524961, "learning_rate": 7.270250858771126e-06, "loss": 17.5268, "step": 20214 }, { "epoch": 0.3695139561664869, "grad_norm": 5.658198226211729, "learning_rate": 7.2699871155819775e-06, "loss": 17.0006, "step": 20215 }, { "epoch": 0.36953223536293345, "grad_norm": 5.933392965481522, "learning_rate": 7.269723364436721e-06, "loss": 17.113, "step": 20216 }, { "epoch": 0.36955051455938, "grad_norm": 7.0326731791723684, "learning_rate": 7.2694596053362834e-06, "loss": 17.9364, "step": 20217 }, { "epoch": 0.3695687937558265, "grad_norm": 7.2282587953716515, "learning_rate": 7.269195838281585e-06, "loss": 17.5891, "step": 20218 }, { "epoch": 0.369587072952273, "grad_norm": 6.463010505382674, "learning_rate": 7.268932063273552e-06, "loss": 17.4782, "step": 20219 }, { "epoch": 0.36960535214871953, "grad_norm": 7.062369766509262, "learning_rate": 7.26866828031311e-06, "loss": 17.5808, "step": 20220 }, { "epoch": 0.36962363134516607, "grad_norm": 7.207672875590722, "learning_rate": 7.2684044894011805e-06, "loss": 17.8386, "step": 20221 }, { "epoch": 0.3696419105416126, "grad_norm": 4.822325848945059, "learning_rate": 7.268140690538692e-06, "loss": 16.8335, "step": 20222 }, { "epoch": 0.36966018973805914, "grad_norm": 5.3225809086744755, "learning_rate": 7.267876883726567e-06, "loss": 17.1389, "step": 20223 }, { "epoch": 0.3696784689345056, "grad_norm": 5.3704522860734105, "learning_rate": 7.267613068965729e-06, "loss": 17.0194, "step": 20224 }, { "epoch": 0.36969674813095216, "grad_norm": 6.869480950513082, "learning_rate": 7.267349246257105e-06, "loss": 18.2319, "step": 20225 }, { "epoch": 0.3697150273273987, "grad_norm": 6.397992614038071, "learning_rate": 7.267085415601618e-06, "loss": 17.5737, "step": 20226 }, { "epoch": 0.36973330652384523, "grad_norm": 6.558547566995884, "learning_rate": 7.266821577000195e-06, "loss": 17.7717, "step": 20227 }, { "epoch": 0.3697515857202917, "grad_norm": 6.4273194172281585, "learning_rate": 7.266557730453757e-06, "loss": 17.454, "step": 20228 }, { "epoch": 0.36976986491673824, "grad_norm": 7.414612660435543, "learning_rate": 7.266293875963232e-06, "loss": 17.8947, "step": 20229 }, { "epoch": 0.3697881441131848, "grad_norm": 6.833536317395566, "learning_rate": 7.266030013529544e-06, "loss": 17.7028, "step": 20230 }, { "epoch": 0.3698064233096313, "grad_norm": 6.376908683057129, "learning_rate": 7.265766143153617e-06, "loss": 17.7087, "step": 20231 }, { "epoch": 0.36982470250607785, "grad_norm": 6.2602699004023385, "learning_rate": 7.265502264836376e-06, "loss": 17.3338, "step": 20232 }, { "epoch": 0.36984298170252433, "grad_norm": 5.588023044502968, "learning_rate": 7.265238378578745e-06, "loss": 17.2379, "step": 20233 }, { "epoch": 0.36986126089897087, "grad_norm": 4.989465197818141, "learning_rate": 7.264974484381653e-06, "loss": 16.9015, "step": 20234 }, { "epoch": 0.3698795400954174, "grad_norm": 6.612200306580514, "learning_rate": 7.26471058224602e-06, "loss": 17.569, "step": 20235 }, { "epoch": 0.36989781929186394, "grad_norm": 7.340673453322384, "learning_rate": 7.264446672172772e-06, "loss": 17.7228, "step": 20236 }, { "epoch": 0.3699160984883105, "grad_norm": 5.468142572189995, "learning_rate": 7.264182754162836e-06, "loss": 17.0734, "step": 20237 }, { "epoch": 0.36993437768475695, "grad_norm": 7.07678047529603, "learning_rate": 7.263918828217137e-06, "loss": 17.6607, "step": 20238 }, { "epoch": 0.3699526568812035, "grad_norm": 7.675859352430443, "learning_rate": 7.263654894336598e-06, "loss": 18.2184, "step": 20239 }, { "epoch": 0.36997093607765, "grad_norm": 5.954324255706961, "learning_rate": 7.263390952522145e-06, "loss": 17.0799, "step": 20240 }, { "epoch": 0.36998921527409656, "grad_norm": 6.029026241074232, "learning_rate": 7.263127002774703e-06, "loss": 17.3228, "step": 20241 }, { "epoch": 0.3700074944705431, "grad_norm": 5.535211916569911, "learning_rate": 7.262863045095197e-06, "loss": 17.2121, "step": 20242 }, { "epoch": 0.3700257736669896, "grad_norm": 7.536357040008187, "learning_rate": 7.262599079484554e-06, "loss": 18.1031, "step": 20243 }, { "epoch": 0.3700440528634361, "grad_norm": 6.40201864864051, "learning_rate": 7.262335105943696e-06, "loss": 17.8633, "step": 20244 }, { "epoch": 0.37006233205988265, "grad_norm": 7.2861604702502065, "learning_rate": 7.262071124473551e-06, "loss": 17.5257, "step": 20245 }, { "epoch": 0.3700806112563292, "grad_norm": 6.315008338704994, "learning_rate": 7.261807135075041e-06, "loss": 17.5906, "step": 20246 }, { "epoch": 0.3700988904527757, "grad_norm": 8.925224656046367, "learning_rate": 7.261543137749094e-06, "loss": 19.0537, "step": 20247 }, { "epoch": 0.3701171696492222, "grad_norm": 5.802604624840978, "learning_rate": 7.261279132496636e-06, "loss": 17.1504, "step": 20248 }, { "epoch": 0.37013544884566874, "grad_norm": 8.186105514082048, "learning_rate": 7.261015119318589e-06, "loss": 18.017, "step": 20249 }, { "epoch": 0.37015372804211527, "grad_norm": 6.326322714453996, "learning_rate": 7.260751098215881e-06, "loss": 17.5734, "step": 20250 }, { "epoch": 0.3701720072385618, "grad_norm": 7.162270440843001, "learning_rate": 7.260487069189437e-06, "loss": 17.9879, "step": 20251 }, { "epoch": 0.37019028643500834, "grad_norm": 6.534782206402165, "learning_rate": 7.260223032240181e-06, "loss": 17.5544, "step": 20252 }, { "epoch": 0.3702085656314548, "grad_norm": 6.059176468142338, "learning_rate": 7.25995898736904e-06, "loss": 17.3778, "step": 20253 }, { "epoch": 0.37022684482790136, "grad_norm": 5.622668196541799, "learning_rate": 7.259694934576939e-06, "loss": 17.4539, "step": 20254 }, { "epoch": 0.3702451240243479, "grad_norm": 7.751454172468496, "learning_rate": 7.259430873864804e-06, "loss": 17.8333, "step": 20255 }, { "epoch": 0.37026340322079443, "grad_norm": 6.797918019734457, "learning_rate": 7.259166805233559e-06, "loss": 17.8342, "step": 20256 }, { "epoch": 0.37028168241724096, "grad_norm": 7.335166529034762, "learning_rate": 7.25890272868413e-06, "loss": 17.7724, "step": 20257 }, { "epoch": 0.37029996161368745, "grad_norm": 5.964816393323727, "learning_rate": 7.258638644217444e-06, "loss": 17.3451, "step": 20258 }, { "epoch": 0.370318240810134, "grad_norm": 9.362367560733293, "learning_rate": 7.258374551834425e-06, "loss": 17.8653, "step": 20259 }, { "epoch": 0.3703365200065805, "grad_norm": 6.005702624129726, "learning_rate": 7.258110451535998e-06, "loss": 17.2804, "step": 20260 }, { "epoch": 0.37035479920302705, "grad_norm": 6.918011032469916, "learning_rate": 7.257846343323091e-06, "loss": 17.6388, "step": 20261 }, { "epoch": 0.37037307839947353, "grad_norm": 6.683525037046206, "learning_rate": 7.257582227196629e-06, "loss": 17.9775, "step": 20262 }, { "epoch": 0.37039135759592007, "grad_norm": 6.844561692522498, "learning_rate": 7.257318103157537e-06, "loss": 17.5909, "step": 20263 }, { "epoch": 0.3704096367923666, "grad_norm": 6.407991545432797, "learning_rate": 7.25705397120674e-06, "loss": 17.442, "step": 20264 }, { "epoch": 0.37042791598881314, "grad_norm": 7.4644311069466385, "learning_rate": 7.256789831345166e-06, "loss": 17.7972, "step": 20265 }, { "epoch": 0.3704461951852597, "grad_norm": 6.029558736818221, "learning_rate": 7.256525683573739e-06, "loss": 17.319, "step": 20266 }, { "epoch": 0.37046447438170615, "grad_norm": 6.868138842885967, "learning_rate": 7.2562615278933845e-06, "loss": 17.7122, "step": 20267 }, { "epoch": 0.3704827535781527, "grad_norm": 6.01779397169704, "learning_rate": 7.255997364305028e-06, "loss": 17.2451, "step": 20268 }, { "epoch": 0.3705010327745992, "grad_norm": 6.623586650363315, "learning_rate": 7.255733192809598e-06, "loss": 17.6405, "step": 20269 }, { "epoch": 0.37051931197104576, "grad_norm": 5.949821133058551, "learning_rate": 7.2554690134080195e-06, "loss": 17.2411, "step": 20270 }, { "epoch": 0.3705375911674923, "grad_norm": 5.943184825420087, "learning_rate": 7.255204826101218e-06, "loss": 17.2515, "step": 20271 }, { "epoch": 0.3705558703639388, "grad_norm": 7.694644087135006, "learning_rate": 7.254940630890119e-06, "loss": 17.9962, "step": 20272 }, { "epoch": 0.3705741495603853, "grad_norm": 7.380378328384874, "learning_rate": 7.254676427775648e-06, "loss": 17.4382, "step": 20273 }, { "epoch": 0.37059242875683185, "grad_norm": 7.042658148811812, "learning_rate": 7.254412216758731e-06, "loss": 17.4113, "step": 20274 }, { "epoch": 0.3706107079532784, "grad_norm": 7.525244056436113, "learning_rate": 7.254147997840297e-06, "loss": 18.1133, "step": 20275 }, { "epoch": 0.3706289871497249, "grad_norm": 7.00185530585026, "learning_rate": 7.25388377102127e-06, "loss": 17.8023, "step": 20276 }, { "epoch": 0.3706472663461714, "grad_norm": 6.562099782250633, "learning_rate": 7.253619536302574e-06, "loss": 17.2893, "step": 20277 }, { "epoch": 0.37066554554261794, "grad_norm": 6.358206370988475, "learning_rate": 7.253355293685137e-06, "loss": 17.6039, "step": 20278 }, { "epoch": 0.37068382473906447, "grad_norm": 6.083152868347161, "learning_rate": 7.2530910431698876e-06, "loss": 17.4665, "step": 20279 }, { "epoch": 0.370702103935511, "grad_norm": 8.2756852119704, "learning_rate": 7.252826784757747e-06, "loss": 18.5358, "step": 20280 }, { "epoch": 0.37072038313195754, "grad_norm": 6.258825911969038, "learning_rate": 7.252562518449646e-06, "loss": 17.2119, "step": 20281 }, { "epoch": 0.370738662328404, "grad_norm": 7.742175015108623, "learning_rate": 7.252298244246507e-06, "loss": 17.6524, "step": 20282 }, { "epoch": 0.37075694152485056, "grad_norm": 7.474971190019222, "learning_rate": 7.252033962149259e-06, "loss": 17.9957, "step": 20283 }, { "epoch": 0.3707752207212971, "grad_norm": 5.816660347635074, "learning_rate": 7.251769672158828e-06, "loss": 17.118, "step": 20284 }, { "epoch": 0.37079349991774363, "grad_norm": 5.944782867154556, "learning_rate": 7.25150537427614e-06, "loss": 17.4711, "step": 20285 }, { "epoch": 0.37081177911419017, "grad_norm": 7.211325359244409, "learning_rate": 7.251241068502121e-06, "loss": 17.7895, "step": 20286 }, { "epoch": 0.37083005831063665, "grad_norm": 7.37474166400793, "learning_rate": 7.250976754837695e-06, "loss": 17.4837, "step": 20287 }, { "epoch": 0.3708483375070832, "grad_norm": 6.2830496208268, "learning_rate": 7.250712433283793e-06, "loss": 17.4363, "step": 20288 }, { "epoch": 0.3708666167035297, "grad_norm": 8.058595380782732, "learning_rate": 7.250448103841339e-06, "loss": 18.1716, "step": 20289 }, { "epoch": 0.37088489589997625, "grad_norm": 5.1949873569851635, "learning_rate": 7.250183766511259e-06, "loss": 17.067, "step": 20290 }, { "epoch": 0.3709031750964228, "grad_norm": 7.460335858185023, "learning_rate": 7.249919421294481e-06, "loss": 17.7925, "step": 20291 }, { "epoch": 0.37092145429286927, "grad_norm": 8.332337387334707, "learning_rate": 7.2496550681919295e-06, "loss": 18.2483, "step": 20292 }, { "epoch": 0.3709397334893158, "grad_norm": 5.447324642704994, "learning_rate": 7.249390707204533e-06, "loss": 17.0831, "step": 20293 }, { "epoch": 0.37095801268576234, "grad_norm": 6.048356023286317, "learning_rate": 7.249126338333218e-06, "loss": 17.4379, "step": 20294 }, { "epoch": 0.3709762918822089, "grad_norm": 6.806845187645089, "learning_rate": 7.2488619615789095e-06, "loss": 17.7563, "step": 20295 }, { "epoch": 0.37099457107865536, "grad_norm": 5.968497371252764, "learning_rate": 7.248597576942534e-06, "loss": 17.3476, "step": 20296 }, { "epoch": 0.3710128502751019, "grad_norm": 7.537556683206155, "learning_rate": 7.248333184425021e-06, "loss": 17.8649, "step": 20297 }, { "epoch": 0.3710311294715484, "grad_norm": 6.298372576893895, "learning_rate": 7.2480687840272935e-06, "loss": 17.2882, "step": 20298 }, { "epoch": 0.37104940866799496, "grad_norm": 6.362553137479139, "learning_rate": 7.247804375750281e-06, "loss": 17.6443, "step": 20299 }, { "epoch": 0.3710676878644415, "grad_norm": 4.55298761071668, "learning_rate": 7.2475399595949105e-06, "loss": 16.6209, "step": 20300 }, { "epoch": 0.371085967060888, "grad_norm": 5.750610391656214, "learning_rate": 7.2472755355621045e-06, "loss": 17.1992, "step": 20301 }, { "epoch": 0.3711042462573345, "grad_norm": 6.369967866465619, "learning_rate": 7.247011103652794e-06, "loss": 17.8721, "step": 20302 }, { "epoch": 0.37112252545378105, "grad_norm": 7.08208857214891, "learning_rate": 7.246746663867906e-06, "loss": 17.9226, "step": 20303 }, { "epoch": 0.3711408046502276, "grad_norm": 6.477355633600419, "learning_rate": 7.246482216208365e-06, "loss": 17.4098, "step": 20304 }, { "epoch": 0.3711590838466741, "grad_norm": 6.497567825869114, "learning_rate": 7.246217760675098e-06, "loss": 17.623, "step": 20305 }, { "epoch": 0.3711773630431206, "grad_norm": 6.477044126668947, "learning_rate": 7.245953297269033e-06, "loss": 17.3895, "step": 20306 }, { "epoch": 0.37119564223956714, "grad_norm": 5.643879089142931, "learning_rate": 7.2456888259910975e-06, "loss": 16.9544, "step": 20307 }, { "epoch": 0.37121392143601367, "grad_norm": 6.8606448765339945, "learning_rate": 7.245424346842217e-06, "loss": 17.5219, "step": 20308 }, { "epoch": 0.3712322006324602, "grad_norm": 7.416814314734445, "learning_rate": 7.2451598598233184e-06, "loss": 17.3286, "step": 20309 }, { "epoch": 0.37125047982890674, "grad_norm": 7.233693659450961, "learning_rate": 7.244895364935329e-06, "loss": 17.6946, "step": 20310 }, { "epoch": 0.3712687590253532, "grad_norm": 6.991765959399882, "learning_rate": 7.244630862179178e-06, "loss": 17.9366, "step": 20311 }, { "epoch": 0.37128703822179976, "grad_norm": 5.725040707089489, "learning_rate": 7.244366351555789e-06, "loss": 17.3533, "step": 20312 }, { "epoch": 0.3713053174182463, "grad_norm": 5.299920044235636, "learning_rate": 7.244101833066093e-06, "loss": 16.9887, "step": 20313 }, { "epoch": 0.37132359661469283, "grad_norm": 6.651243925486612, "learning_rate": 7.243837306711011e-06, "loss": 17.7768, "step": 20314 }, { "epoch": 0.37134187581113937, "grad_norm": 6.195642278522273, "learning_rate": 7.243572772491476e-06, "loss": 17.2241, "step": 20315 }, { "epoch": 0.37136015500758585, "grad_norm": 7.474750560470644, "learning_rate": 7.243308230408413e-06, "loss": 17.6936, "step": 20316 }, { "epoch": 0.3713784342040324, "grad_norm": 6.453715122510833, "learning_rate": 7.243043680462751e-06, "loss": 17.6156, "step": 20317 }, { "epoch": 0.3713967134004789, "grad_norm": 4.693457419653771, "learning_rate": 7.2427791226554136e-06, "loss": 16.9029, "step": 20318 }, { "epoch": 0.37141499259692545, "grad_norm": 7.411761556224165, "learning_rate": 7.24251455698733e-06, "loss": 17.5067, "step": 20319 }, { "epoch": 0.371433271793372, "grad_norm": 6.894335782006021, "learning_rate": 7.242249983459429e-06, "loss": 17.7946, "step": 20320 }, { "epoch": 0.37145155098981847, "grad_norm": 8.265264598175682, "learning_rate": 7.241985402072634e-06, "loss": 17.5989, "step": 20321 }, { "epoch": 0.371469830186265, "grad_norm": 6.0522965571151515, "learning_rate": 7.241720812827876e-06, "loss": 17.4702, "step": 20322 }, { "epoch": 0.37148810938271154, "grad_norm": 5.793673229110889, "learning_rate": 7.241456215726082e-06, "loss": 17.4635, "step": 20323 }, { "epoch": 0.3715063885791581, "grad_norm": 6.144375277382662, "learning_rate": 7.241191610768177e-06, "loss": 17.5922, "step": 20324 }, { "epoch": 0.3715246677756046, "grad_norm": 5.6056628885740265, "learning_rate": 7.24092699795509e-06, "loss": 17.2037, "step": 20325 }, { "epoch": 0.3715429469720511, "grad_norm": 5.406300344384662, "learning_rate": 7.240662377287748e-06, "loss": 17.1828, "step": 20326 }, { "epoch": 0.3715612261684976, "grad_norm": 7.063140556306277, "learning_rate": 7.240397748767081e-06, "loss": 17.6718, "step": 20327 }, { "epoch": 0.37157950536494416, "grad_norm": 7.660404107492753, "learning_rate": 7.240133112394012e-06, "loss": 18.1827, "step": 20328 }, { "epoch": 0.3715977845613907, "grad_norm": 6.445982472041131, "learning_rate": 7.239868468169471e-06, "loss": 17.5548, "step": 20329 }, { "epoch": 0.3716160637578372, "grad_norm": 6.112940911948984, "learning_rate": 7.239603816094387e-06, "loss": 17.2363, "step": 20330 }, { "epoch": 0.3716343429542837, "grad_norm": 5.734139377635783, "learning_rate": 7.239339156169686e-06, "loss": 17.1976, "step": 20331 }, { "epoch": 0.37165262215073025, "grad_norm": 5.476184692509491, "learning_rate": 7.239074488396294e-06, "loss": 17.2108, "step": 20332 }, { "epoch": 0.3716709013471768, "grad_norm": 6.127131572197858, "learning_rate": 7.238809812775139e-06, "loss": 17.3148, "step": 20333 }, { "epoch": 0.3716891805436233, "grad_norm": 7.332164080361569, "learning_rate": 7.238545129307153e-06, "loss": 17.7033, "step": 20334 }, { "epoch": 0.3717074597400698, "grad_norm": 6.19730544508833, "learning_rate": 7.2382804379932595e-06, "loss": 17.3996, "step": 20335 }, { "epoch": 0.37172573893651634, "grad_norm": 7.430300612267093, "learning_rate": 7.238015738834388e-06, "loss": 18.2496, "step": 20336 }, { "epoch": 0.3717440181329629, "grad_norm": 6.067064644869495, "learning_rate": 7.237751031831464e-06, "loss": 17.2441, "step": 20337 }, { "epoch": 0.3717622973294094, "grad_norm": 6.331459560734785, "learning_rate": 7.2374863169854175e-06, "loss": 17.3189, "step": 20338 }, { "epoch": 0.37178057652585594, "grad_norm": 5.788864823371587, "learning_rate": 7.237221594297175e-06, "loss": 17.0798, "step": 20339 }, { "epoch": 0.3717988557223024, "grad_norm": 6.677458964288095, "learning_rate": 7.236956863767665e-06, "loss": 17.7383, "step": 20340 }, { "epoch": 0.37181713491874896, "grad_norm": 6.999870699390516, "learning_rate": 7.2366921253978165e-06, "loss": 17.6734, "step": 20341 }, { "epoch": 0.3718354141151955, "grad_norm": 8.180484547018644, "learning_rate": 7.236427379188556e-06, "loss": 18.4928, "step": 20342 }, { "epoch": 0.37185369331164203, "grad_norm": 7.760925417708203, "learning_rate": 7.2361626251408105e-06, "loss": 18.3223, "step": 20343 }, { "epoch": 0.37187197250808857, "grad_norm": 6.124893382275461, "learning_rate": 7.235897863255509e-06, "loss": 17.5096, "step": 20344 }, { "epoch": 0.37189025170453505, "grad_norm": 5.905706033508136, "learning_rate": 7.23563309353358e-06, "loss": 17.3197, "step": 20345 }, { "epoch": 0.3719085309009816, "grad_norm": 6.6805730546934505, "learning_rate": 7.235368315975951e-06, "loss": 17.679, "step": 20346 }, { "epoch": 0.3719268100974281, "grad_norm": 6.794605647027376, "learning_rate": 7.23510353058355e-06, "loss": 17.8838, "step": 20347 }, { "epoch": 0.37194508929387465, "grad_norm": 5.693706186866842, "learning_rate": 7.234838737357306e-06, "loss": 17.549, "step": 20348 }, { "epoch": 0.3719633684903212, "grad_norm": 7.74429790113914, "learning_rate": 7.234573936298146e-06, "loss": 17.9132, "step": 20349 }, { "epoch": 0.37198164768676767, "grad_norm": 7.1510684608011825, "learning_rate": 7.234309127406998e-06, "loss": 17.5058, "step": 20350 }, { "epoch": 0.3719999268832142, "grad_norm": 5.94800289132471, "learning_rate": 7.234044310684789e-06, "loss": 17.368, "step": 20351 }, { "epoch": 0.37201820607966074, "grad_norm": 7.601307345453096, "learning_rate": 7.233779486132451e-06, "loss": 17.7545, "step": 20352 }, { "epoch": 0.3720364852761073, "grad_norm": 5.93708836301738, "learning_rate": 7.233514653750907e-06, "loss": 17.4703, "step": 20353 }, { "epoch": 0.3720547644725538, "grad_norm": 6.089745149104948, "learning_rate": 7.23324981354109e-06, "loss": 17.1858, "step": 20354 }, { "epoch": 0.3720730436690003, "grad_norm": 5.428649211988015, "learning_rate": 7.232984965503925e-06, "loss": 17.0951, "step": 20355 }, { "epoch": 0.3720913228654468, "grad_norm": 7.580011344726705, "learning_rate": 7.232720109640342e-06, "loss": 17.7734, "step": 20356 }, { "epoch": 0.37210960206189336, "grad_norm": 7.538724121483692, "learning_rate": 7.232455245951269e-06, "loss": 17.9085, "step": 20357 }, { "epoch": 0.3721278812583399, "grad_norm": 6.5290370401590705, "learning_rate": 7.232190374437634e-06, "loss": 17.5595, "step": 20358 }, { "epoch": 0.37214616045478643, "grad_norm": 7.6163367269095374, "learning_rate": 7.231925495100365e-06, "loss": 18.1394, "step": 20359 }, { "epoch": 0.3721644396512329, "grad_norm": 7.053694094974511, "learning_rate": 7.231660607940391e-06, "loss": 17.8886, "step": 20360 }, { "epoch": 0.37218271884767945, "grad_norm": 5.9060435750609335, "learning_rate": 7.23139571295864e-06, "loss": 17.3318, "step": 20361 }, { "epoch": 0.372200998044126, "grad_norm": 7.689619564427998, "learning_rate": 7.231130810156042e-06, "loss": 17.9838, "step": 20362 }, { "epoch": 0.3722192772405725, "grad_norm": 8.4709035566881, "learning_rate": 7.230865899533522e-06, "loss": 17.8392, "step": 20363 }, { "epoch": 0.372237556437019, "grad_norm": 5.693540796749584, "learning_rate": 7.230600981092012e-06, "loss": 17.215, "step": 20364 }, { "epoch": 0.37225583563346554, "grad_norm": 5.698863212612251, "learning_rate": 7.230336054832438e-06, "loss": 17.0566, "step": 20365 }, { "epoch": 0.3722741148299121, "grad_norm": 7.070801136228155, "learning_rate": 7.230071120755732e-06, "loss": 17.548, "step": 20366 }, { "epoch": 0.3722923940263586, "grad_norm": 8.326772885066012, "learning_rate": 7.229806178862818e-06, "loss": 17.5586, "step": 20367 }, { "epoch": 0.37231067322280514, "grad_norm": 5.982651802352122, "learning_rate": 7.229541229154627e-06, "loss": 17.277, "step": 20368 }, { "epoch": 0.3723289524192516, "grad_norm": 6.496205803133922, "learning_rate": 7.2292762716320886e-06, "loss": 17.6187, "step": 20369 }, { "epoch": 0.37234723161569816, "grad_norm": 6.161779591690141, "learning_rate": 7.229011306296129e-06, "loss": 17.5687, "step": 20370 }, { "epoch": 0.3723655108121447, "grad_norm": 7.006544113911659, "learning_rate": 7.2287463331476795e-06, "loss": 17.7012, "step": 20371 }, { "epoch": 0.37238379000859123, "grad_norm": 5.95469046391852, "learning_rate": 7.228481352187668e-06, "loss": 17.1852, "step": 20372 }, { "epoch": 0.37240206920503777, "grad_norm": 8.285553807565886, "learning_rate": 7.2282163634170196e-06, "loss": 17.919, "step": 20373 }, { "epoch": 0.37242034840148425, "grad_norm": 6.3174512701254, "learning_rate": 7.2279513668366696e-06, "loss": 17.2932, "step": 20374 }, { "epoch": 0.3724386275979308, "grad_norm": 5.365878806247166, "learning_rate": 7.2276863624475414e-06, "loss": 17.0177, "step": 20375 }, { "epoch": 0.3724569067943773, "grad_norm": 7.849594451486141, "learning_rate": 7.227421350250568e-06, "loss": 18.0845, "step": 20376 }, { "epoch": 0.37247518599082385, "grad_norm": 7.173790348339584, "learning_rate": 7.227156330246674e-06, "loss": 17.7624, "step": 20377 }, { "epoch": 0.3724934651872704, "grad_norm": 6.732504055398928, "learning_rate": 7.226891302436789e-06, "loss": 17.5077, "step": 20378 }, { "epoch": 0.37251174438371687, "grad_norm": 7.776751079739892, "learning_rate": 7.226626266821847e-06, "loss": 17.9832, "step": 20379 }, { "epoch": 0.3725300235801634, "grad_norm": 7.164123401789497, "learning_rate": 7.226361223402771e-06, "loss": 17.8237, "step": 20380 }, { "epoch": 0.37254830277660994, "grad_norm": 5.495777387559704, "learning_rate": 7.226096172180492e-06, "loss": 17.0427, "step": 20381 }, { "epoch": 0.3725665819730565, "grad_norm": 7.497811476326853, "learning_rate": 7.225831113155939e-06, "loss": 17.7845, "step": 20382 }, { "epoch": 0.372584861169503, "grad_norm": 6.126258756148357, "learning_rate": 7.225566046330041e-06, "loss": 17.292, "step": 20383 }, { "epoch": 0.3726031403659495, "grad_norm": 7.5891907319413425, "learning_rate": 7.225300971703728e-06, "loss": 17.7541, "step": 20384 }, { "epoch": 0.37262141956239603, "grad_norm": 5.681311559330811, "learning_rate": 7.225035889277928e-06, "loss": 17.2156, "step": 20385 }, { "epoch": 0.37263969875884256, "grad_norm": 5.389113341989961, "learning_rate": 7.224770799053571e-06, "loss": 17.2472, "step": 20386 }, { "epoch": 0.3726579779552891, "grad_norm": 7.119291474765218, "learning_rate": 7.224505701031584e-06, "loss": 17.9081, "step": 20387 }, { "epoch": 0.37267625715173563, "grad_norm": 7.550679998764515, "learning_rate": 7.224240595212898e-06, "loss": 17.8024, "step": 20388 }, { "epoch": 0.3726945363481821, "grad_norm": 7.120317814639238, "learning_rate": 7.223975481598443e-06, "loss": 17.6353, "step": 20389 }, { "epoch": 0.37271281554462865, "grad_norm": 6.534661234696521, "learning_rate": 7.223710360189145e-06, "loss": 17.6243, "step": 20390 }, { "epoch": 0.3727310947410752, "grad_norm": 6.674445844875846, "learning_rate": 7.223445230985936e-06, "loss": 17.8813, "step": 20391 }, { "epoch": 0.3727493739375217, "grad_norm": 6.958357293000667, "learning_rate": 7.223180093989743e-06, "loss": 17.6807, "step": 20392 }, { "epoch": 0.37276765313396826, "grad_norm": 6.307849016021199, "learning_rate": 7.2229149492015e-06, "loss": 17.414, "step": 20393 }, { "epoch": 0.37278593233041474, "grad_norm": 9.402225556016122, "learning_rate": 7.2226497966221295e-06, "loss": 18.8113, "step": 20394 }, { "epoch": 0.3728042115268613, "grad_norm": 6.158146935391387, "learning_rate": 7.222384636252566e-06, "loss": 17.4431, "step": 20395 }, { "epoch": 0.3728224907233078, "grad_norm": 8.180275466524071, "learning_rate": 7.2221194680937375e-06, "loss": 18.4592, "step": 20396 }, { "epoch": 0.37284076991975434, "grad_norm": 6.246114860222931, "learning_rate": 7.221854292146573e-06, "loss": 17.2941, "step": 20397 }, { "epoch": 0.3728590491162008, "grad_norm": 6.15569543233691, "learning_rate": 7.221589108412001e-06, "loss": 17.5038, "step": 20398 }, { "epoch": 0.37287732831264736, "grad_norm": 7.643497344732373, "learning_rate": 7.221323916890952e-06, "loss": 17.9917, "step": 20399 }, { "epoch": 0.3728956075090939, "grad_norm": 6.504358251889061, "learning_rate": 7.221058717584357e-06, "loss": 17.5867, "step": 20400 }, { "epoch": 0.37291388670554043, "grad_norm": 6.410339312678638, "learning_rate": 7.2207935104931425e-06, "loss": 17.5052, "step": 20401 }, { "epoch": 0.37293216590198697, "grad_norm": 8.280785292636546, "learning_rate": 7.22052829561824e-06, "loss": 17.466, "step": 20402 }, { "epoch": 0.37295044509843345, "grad_norm": 6.071215727603237, "learning_rate": 7.2202630729605794e-06, "loss": 17.3962, "step": 20403 }, { "epoch": 0.37296872429488, "grad_norm": 7.037387699823428, "learning_rate": 7.219997842521088e-06, "loss": 18.0676, "step": 20404 }, { "epoch": 0.3729870034913265, "grad_norm": 6.362228304268196, "learning_rate": 7.2197326043006965e-06, "loss": 17.452, "step": 20405 }, { "epoch": 0.37300528268777305, "grad_norm": 4.923762259060005, "learning_rate": 7.219467358300335e-06, "loss": 16.9562, "step": 20406 }, { "epoch": 0.3730235618842196, "grad_norm": 5.680735934454968, "learning_rate": 7.219202104520935e-06, "loss": 17.1766, "step": 20407 }, { "epoch": 0.37304184108066607, "grad_norm": 6.010009134758298, "learning_rate": 7.218936842963422e-06, "loss": 17.3163, "step": 20408 }, { "epoch": 0.3730601202771126, "grad_norm": 6.572640723154287, "learning_rate": 7.218671573628729e-06, "loss": 17.5496, "step": 20409 }, { "epoch": 0.37307839947355914, "grad_norm": 5.927443830371127, "learning_rate": 7.218406296517785e-06, "loss": 17.3753, "step": 20410 }, { "epoch": 0.3730966786700057, "grad_norm": 7.44410725698587, "learning_rate": 7.218141011631518e-06, "loss": 17.9294, "step": 20411 }, { "epoch": 0.3731149578664522, "grad_norm": 5.054380719914831, "learning_rate": 7.21787571897086e-06, "loss": 16.8668, "step": 20412 }, { "epoch": 0.3731332370628987, "grad_norm": 6.755970480299233, "learning_rate": 7.21761041853674e-06, "loss": 17.6978, "step": 20413 }, { "epoch": 0.37315151625934523, "grad_norm": 6.60742547686864, "learning_rate": 7.217345110330088e-06, "loss": 17.3586, "step": 20414 }, { "epoch": 0.37316979545579176, "grad_norm": 6.517868095519433, "learning_rate": 7.217079794351833e-06, "loss": 17.6383, "step": 20415 }, { "epoch": 0.3731880746522383, "grad_norm": 7.533480740334832, "learning_rate": 7.216814470602907e-06, "loss": 18.0985, "step": 20416 }, { "epoch": 0.37320635384868484, "grad_norm": 6.71196017452529, "learning_rate": 7.216549139084239e-06, "loss": 17.6497, "step": 20417 }, { "epoch": 0.3732246330451313, "grad_norm": 5.921607526423101, "learning_rate": 7.216283799796758e-06, "loss": 17.4144, "step": 20418 }, { "epoch": 0.37324291224157785, "grad_norm": 8.118812396137619, "learning_rate": 7.216018452741393e-06, "loss": 18.1792, "step": 20419 }, { "epoch": 0.3732611914380244, "grad_norm": 8.533555423039589, "learning_rate": 7.215753097919078e-06, "loss": 18.1658, "step": 20420 }, { "epoch": 0.3732794706344709, "grad_norm": 9.005233588612297, "learning_rate": 7.215487735330739e-06, "loss": 18.7296, "step": 20421 }, { "epoch": 0.37329774983091746, "grad_norm": 7.1531783299493465, "learning_rate": 7.215222364977309e-06, "loss": 17.8506, "step": 20422 }, { "epoch": 0.37331602902736394, "grad_norm": 6.62261073381144, "learning_rate": 7.2149569868597156e-06, "loss": 17.3995, "step": 20423 }, { "epoch": 0.3733343082238105, "grad_norm": 7.694517476480957, "learning_rate": 7.214691600978891e-06, "loss": 18.037, "step": 20424 }, { "epoch": 0.373352587420257, "grad_norm": 5.286063429211789, "learning_rate": 7.214426207335765e-06, "loss": 16.9748, "step": 20425 }, { "epoch": 0.37337086661670355, "grad_norm": 6.853043960827987, "learning_rate": 7.2141608059312665e-06, "loss": 17.6959, "step": 20426 }, { "epoch": 0.3733891458131501, "grad_norm": 7.6643730515163275, "learning_rate": 7.213895396766327e-06, "loss": 17.8825, "step": 20427 }, { "epoch": 0.37340742500959656, "grad_norm": 5.409856853628547, "learning_rate": 7.213629979841875e-06, "loss": 16.9521, "step": 20428 }, { "epoch": 0.3734257042060431, "grad_norm": 5.500316520841786, "learning_rate": 7.213364555158843e-06, "loss": 17.1085, "step": 20429 }, { "epoch": 0.37344398340248963, "grad_norm": 6.484131949159568, "learning_rate": 7.21309912271816e-06, "loss": 17.4931, "step": 20430 }, { "epoch": 0.37346226259893617, "grad_norm": 7.160145232373765, "learning_rate": 7.212833682520758e-06, "loss": 17.8472, "step": 20431 }, { "epoch": 0.37348054179538265, "grad_norm": 8.808054033558152, "learning_rate": 7.212568234567563e-06, "loss": 17.7341, "step": 20432 }, { "epoch": 0.3734988209918292, "grad_norm": 7.0031468822222935, "learning_rate": 7.21230277885951e-06, "loss": 17.9265, "step": 20433 }, { "epoch": 0.3735171001882757, "grad_norm": 6.6477486261601335, "learning_rate": 7.212037315397528e-06, "loss": 17.5194, "step": 20434 }, { "epoch": 0.37353537938472225, "grad_norm": 6.296791146161914, "learning_rate": 7.2117718441825475e-06, "loss": 17.4754, "step": 20435 }, { "epoch": 0.3735536585811688, "grad_norm": 5.909369086372829, "learning_rate": 7.211506365215499e-06, "loss": 17.3268, "step": 20436 }, { "epoch": 0.37357193777761527, "grad_norm": 7.5901947275425625, "learning_rate": 7.21124087849731e-06, "loss": 17.8533, "step": 20437 }, { "epoch": 0.3735902169740618, "grad_norm": 6.5267304256580365, "learning_rate": 7.210975384028917e-06, "loss": 17.5819, "step": 20438 }, { "epoch": 0.37360849617050834, "grad_norm": 8.301980468284086, "learning_rate": 7.210709881811245e-06, "loss": 18.2158, "step": 20439 }, { "epoch": 0.3736267753669549, "grad_norm": 6.438116920575476, "learning_rate": 7.210444371845227e-06, "loss": 17.5852, "step": 20440 }, { "epoch": 0.3736450545634014, "grad_norm": 5.890955174083961, "learning_rate": 7.210178854131793e-06, "loss": 17.1586, "step": 20441 }, { "epoch": 0.3736633337598479, "grad_norm": 6.658707114323633, "learning_rate": 7.2099133286718744e-06, "loss": 17.5105, "step": 20442 }, { "epoch": 0.37368161295629443, "grad_norm": 7.20389531042741, "learning_rate": 7.209647795466401e-06, "loss": 17.5541, "step": 20443 }, { "epoch": 0.37369989215274096, "grad_norm": 6.692437785833514, "learning_rate": 7.209382254516304e-06, "loss": 17.5992, "step": 20444 }, { "epoch": 0.3737181713491875, "grad_norm": 7.156500761876589, "learning_rate": 7.209116705822516e-06, "loss": 17.7442, "step": 20445 }, { "epoch": 0.37373645054563404, "grad_norm": 10.273160095679948, "learning_rate": 7.208851149385963e-06, "loss": 17.881, "step": 20446 }, { "epoch": 0.3737547297420805, "grad_norm": 7.652218591225395, "learning_rate": 7.208585585207578e-06, "loss": 18.0469, "step": 20447 }, { "epoch": 0.37377300893852705, "grad_norm": 6.983571133024348, "learning_rate": 7.208320013288295e-06, "loss": 17.4955, "step": 20448 }, { "epoch": 0.3737912881349736, "grad_norm": 8.059806225719282, "learning_rate": 7.2080544336290395e-06, "loss": 18.0089, "step": 20449 }, { "epoch": 0.3738095673314201, "grad_norm": 7.542634018960026, "learning_rate": 7.2077888462307456e-06, "loss": 18.0137, "step": 20450 }, { "epoch": 0.37382784652786666, "grad_norm": 5.932502408738489, "learning_rate": 7.207523251094344e-06, "loss": 17.358, "step": 20451 }, { "epoch": 0.37384612572431314, "grad_norm": 6.534574091013455, "learning_rate": 7.207257648220763e-06, "loss": 17.3377, "step": 20452 }, { "epoch": 0.3738644049207597, "grad_norm": 5.875549277401524, "learning_rate": 7.206992037610937e-06, "loss": 17.3167, "step": 20453 }, { "epoch": 0.3738826841172062, "grad_norm": 7.025478727291052, "learning_rate": 7.206726419265795e-06, "loss": 17.4492, "step": 20454 }, { "epoch": 0.37390096331365275, "grad_norm": 6.612097043680163, "learning_rate": 7.206460793186268e-06, "loss": 17.4482, "step": 20455 }, { "epoch": 0.3739192425100993, "grad_norm": 6.267549369740469, "learning_rate": 7.206195159373288e-06, "loss": 17.2456, "step": 20456 }, { "epoch": 0.37393752170654576, "grad_norm": 6.401926259821656, "learning_rate": 7.205929517827785e-06, "loss": 17.4843, "step": 20457 }, { "epoch": 0.3739558009029923, "grad_norm": 6.560521431492206, "learning_rate": 7.205663868550693e-06, "loss": 17.5084, "step": 20458 }, { "epoch": 0.37397408009943883, "grad_norm": 5.626893536183918, "learning_rate": 7.205398211542938e-06, "loss": 17.1795, "step": 20459 }, { "epoch": 0.37399235929588537, "grad_norm": 5.339963679579897, "learning_rate": 7.205132546805454e-06, "loss": 17.0069, "step": 20460 }, { "epoch": 0.3740106384923319, "grad_norm": 7.4430745967816385, "learning_rate": 7.204866874339172e-06, "loss": 17.921, "step": 20461 }, { "epoch": 0.3740289176887784, "grad_norm": 5.656506238986444, "learning_rate": 7.2046011941450225e-06, "loss": 17.4381, "step": 20462 }, { "epoch": 0.3740471968852249, "grad_norm": 6.401459680619209, "learning_rate": 7.204335506223937e-06, "loss": 17.2807, "step": 20463 }, { "epoch": 0.37406547608167146, "grad_norm": 6.163360997915802, "learning_rate": 7.204069810576848e-06, "loss": 17.3161, "step": 20464 }, { "epoch": 0.374083755278118, "grad_norm": 6.956249254360118, "learning_rate": 7.203804107204684e-06, "loss": 17.3061, "step": 20465 }, { "epoch": 0.37410203447456447, "grad_norm": 7.914146130191101, "learning_rate": 7.203538396108378e-06, "loss": 18.0533, "step": 20466 }, { "epoch": 0.374120313671011, "grad_norm": 6.022921740089152, "learning_rate": 7.203272677288863e-06, "loss": 17.1129, "step": 20467 }, { "epoch": 0.37413859286745754, "grad_norm": 5.345696553436056, "learning_rate": 7.2030069507470665e-06, "loss": 17.0964, "step": 20468 }, { "epoch": 0.3741568720639041, "grad_norm": 5.8767713802306165, "learning_rate": 7.202741216483923e-06, "loss": 17.344, "step": 20469 }, { "epoch": 0.3741751512603506, "grad_norm": 7.090472714638295, "learning_rate": 7.202475474500361e-06, "loss": 17.8596, "step": 20470 }, { "epoch": 0.3741934304567971, "grad_norm": 6.851878326223762, "learning_rate": 7.202209724797316e-06, "loss": 17.7297, "step": 20471 }, { "epoch": 0.37421170965324363, "grad_norm": 6.653875934548672, "learning_rate": 7.201943967375716e-06, "loss": 17.8149, "step": 20472 }, { "epoch": 0.37422998884969016, "grad_norm": 6.095453224164434, "learning_rate": 7.201678202236493e-06, "loss": 17.4917, "step": 20473 }, { "epoch": 0.3742482680461367, "grad_norm": 6.747793075429817, "learning_rate": 7.201412429380579e-06, "loss": 17.7392, "step": 20474 }, { "epoch": 0.37426654724258324, "grad_norm": 5.7077639193355285, "learning_rate": 7.201146648808906e-06, "loss": 17.2055, "step": 20475 }, { "epoch": 0.3742848264390297, "grad_norm": 5.588325452177898, "learning_rate": 7.200880860522405e-06, "loss": 17.1681, "step": 20476 }, { "epoch": 0.37430310563547625, "grad_norm": 7.116280070737461, "learning_rate": 7.2006150645220075e-06, "loss": 17.9519, "step": 20477 }, { "epoch": 0.3743213848319228, "grad_norm": 7.354552839910853, "learning_rate": 7.200349260808644e-06, "loss": 17.9828, "step": 20478 }, { "epoch": 0.3743396640283693, "grad_norm": 5.0117794482627955, "learning_rate": 7.200083449383248e-06, "loss": 17.0739, "step": 20479 }, { "epoch": 0.37435794322481586, "grad_norm": 6.110387257820735, "learning_rate": 7.199817630246751e-06, "loss": 17.245, "step": 20480 }, { "epoch": 0.37437622242126234, "grad_norm": 6.26316331195001, "learning_rate": 7.1995518034000836e-06, "loss": 17.5018, "step": 20481 }, { "epoch": 0.3743945016177089, "grad_norm": 5.416961702897103, "learning_rate": 7.199285968844178e-06, "loss": 17.0941, "step": 20482 }, { "epoch": 0.3744127808141554, "grad_norm": 7.296340194253948, "learning_rate": 7.199020126579966e-06, "loss": 18.0503, "step": 20483 }, { "epoch": 0.37443106001060195, "grad_norm": 8.16897791809317, "learning_rate": 7.19875427660838e-06, "loss": 18.1047, "step": 20484 }, { "epoch": 0.3744493392070485, "grad_norm": 5.496261649783343, "learning_rate": 7.1984884189303495e-06, "loss": 17.1197, "step": 20485 }, { "epoch": 0.37446761840349496, "grad_norm": 6.9872561177916594, "learning_rate": 7.19822255354681e-06, "loss": 17.7445, "step": 20486 }, { "epoch": 0.3744858975999415, "grad_norm": 5.224986596449498, "learning_rate": 7.197956680458689e-06, "loss": 16.9281, "step": 20487 }, { "epoch": 0.37450417679638803, "grad_norm": 5.579414671273383, "learning_rate": 7.197690799666921e-06, "loss": 17.0586, "step": 20488 }, { "epoch": 0.37452245599283457, "grad_norm": 6.510955389162127, "learning_rate": 7.197424911172439e-06, "loss": 17.3088, "step": 20489 }, { "epoch": 0.3745407351892811, "grad_norm": 7.3533702767898, "learning_rate": 7.197159014976172e-06, "loss": 17.8623, "step": 20490 }, { "epoch": 0.3745590143857276, "grad_norm": 6.424771511305801, "learning_rate": 7.196893111079054e-06, "loss": 17.471, "step": 20491 }, { "epoch": 0.3745772935821741, "grad_norm": 7.165256769964428, "learning_rate": 7.196627199482015e-06, "loss": 17.5574, "step": 20492 }, { "epoch": 0.37459557277862066, "grad_norm": 6.464879066796956, "learning_rate": 7.19636128018599e-06, "loss": 17.5617, "step": 20493 }, { "epoch": 0.3746138519750672, "grad_norm": 5.3519671094987675, "learning_rate": 7.196095353191909e-06, "loss": 16.9451, "step": 20494 }, { "epoch": 0.3746321311715137, "grad_norm": 7.342403173271185, "learning_rate": 7.195829418500704e-06, "loss": 17.6891, "step": 20495 }, { "epoch": 0.3746504103679602, "grad_norm": 7.397482331241367, "learning_rate": 7.195563476113306e-06, "loss": 17.8364, "step": 20496 }, { "epoch": 0.37466868956440674, "grad_norm": 7.601953773384147, "learning_rate": 7.19529752603065e-06, "loss": 17.7663, "step": 20497 }, { "epoch": 0.3746869687608533, "grad_norm": 6.155741447949556, "learning_rate": 7.195031568253667e-06, "loss": 17.4188, "step": 20498 }, { "epoch": 0.3747052479572998, "grad_norm": 8.528566994652131, "learning_rate": 7.194765602783288e-06, "loss": 18.6777, "step": 20499 }, { "epoch": 0.3747235271537463, "grad_norm": 5.999514942833209, "learning_rate": 7.194499629620446e-06, "loss": 17.3963, "step": 20500 }, { "epoch": 0.37474180635019283, "grad_norm": 5.988950183033299, "learning_rate": 7.194233648766073e-06, "loss": 17.3092, "step": 20501 }, { "epoch": 0.37476008554663937, "grad_norm": 6.778376306879646, "learning_rate": 7.193967660221103e-06, "loss": 17.6975, "step": 20502 }, { "epoch": 0.3747783647430859, "grad_norm": 6.825540751643079, "learning_rate": 7.1937016639864665e-06, "loss": 17.9962, "step": 20503 }, { "epoch": 0.37479664393953244, "grad_norm": 6.92133841962337, "learning_rate": 7.193435660063095e-06, "loss": 17.7729, "step": 20504 }, { "epoch": 0.3748149231359789, "grad_norm": 6.629127129797609, "learning_rate": 7.193169648451921e-06, "loss": 17.5761, "step": 20505 }, { "epoch": 0.37483320233242545, "grad_norm": 6.247274377170819, "learning_rate": 7.192903629153879e-06, "loss": 17.6489, "step": 20506 }, { "epoch": 0.374851481528872, "grad_norm": 4.864723505686175, "learning_rate": 7.192637602169901e-06, "loss": 16.8646, "step": 20507 }, { "epoch": 0.3748697607253185, "grad_norm": 7.351974906030962, "learning_rate": 7.192371567500917e-06, "loss": 17.8657, "step": 20508 }, { "epoch": 0.37488803992176506, "grad_norm": 6.279121183760727, "learning_rate": 7.192105525147861e-06, "loss": 17.553, "step": 20509 }, { "epoch": 0.37490631911821154, "grad_norm": 6.4870410422967, "learning_rate": 7.191839475111666e-06, "loss": 17.7006, "step": 20510 }, { "epoch": 0.3749245983146581, "grad_norm": 6.529975507119786, "learning_rate": 7.191573417393264e-06, "loss": 17.4445, "step": 20511 }, { "epoch": 0.3749428775111046, "grad_norm": 5.882872064621326, "learning_rate": 7.191307351993586e-06, "loss": 17.4472, "step": 20512 }, { "epoch": 0.37496115670755115, "grad_norm": 6.89285895607024, "learning_rate": 7.191041278913566e-06, "loss": 18.0626, "step": 20513 }, { "epoch": 0.3749794359039977, "grad_norm": 7.402257841543211, "learning_rate": 7.190775198154139e-06, "loss": 17.7354, "step": 20514 }, { "epoch": 0.37499771510044416, "grad_norm": 6.9256213267346975, "learning_rate": 7.190509109716232e-06, "loss": 17.7617, "step": 20515 }, { "epoch": 0.3750159942968907, "grad_norm": 6.055825591041182, "learning_rate": 7.190243013600782e-06, "loss": 16.9858, "step": 20516 }, { "epoch": 0.37503427349333723, "grad_norm": 6.9973535582198245, "learning_rate": 7.189976909808721e-06, "loss": 17.9019, "step": 20517 }, { "epoch": 0.37505255268978377, "grad_norm": 5.49296416870087, "learning_rate": 7.189710798340981e-06, "loss": 17.0977, "step": 20518 }, { "epoch": 0.3750708318862303, "grad_norm": 5.5940893216151295, "learning_rate": 7.189444679198492e-06, "loss": 17.1064, "step": 20519 }, { "epoch": 0.3750891110826768, "grad_norm": 6.803714787433599, "learning_rate": 7.189178552382192e-06, "loss": 17.8292, "step": 20520 }, { "epoch": 0.3751073902791233, "grad_norm": 6.706811302961145, "learning_rate": 7.18891241789301e-06, "loss": 17.8327, "step": 20521 }, { "epoch": 0.37512566947556986, "grad_norm": 6.087035417363352, "learning_rate": 7.188646275731881e-06, "loss": 17.5479, "step": 20522 }, { "epoch": 0.3751439486720164, "grad_norm": 5.825170628716262, "learning_rate": 7.188380125899736e-06, "loss": 17.5318, "step": 20523 }, { "epoch": 0.3751622278684629, "grad_norm": 7.386143292146687, "learning_rate": 7.188113968397508e-06, "loss": 17.8499, "step": 20524 }, { "epoch": 0.3751805070649094, "grad_norm": 5.670855151232415, "learning_rate": 7.1878478032261314e-06, "loss": 17.154, "step": 20525 }, { "epoch": 0.37519878626135594, "grad_norm": 6.059875690237442, "learning_rate": 7.187581630386538e-06, "loss": 17.3713, "step": 20526 }, { "epoch": 0.3752170654578025, "grad_norm": 5.732884623881086, "learning_rate": 7.187315449879659e-06, "loss": 17.1756, "step": 20527 }, { "epoch": 0.375235344654249, "grad_norm": 7.308732349846579, "learning_rate": 7.187049261706431e-06, "loss": 18.1432, "step": 20528 }, { "epoch": 0.37525362385069555, "grad_norm": 6.459090289358423, "learning_rate": 7.186783065867785e-06, "loss": 17.3665, "step": 20529 }, { "epoch": 0.37527190304714203, "grad_norm": 5.737021132272651, "learning_rate": 7.1865168623646546e-06, "loss": 17.4221, "step": 20530 }, { "epoch": 0.37529018224358857, "grad_norm": 6.279016688668446, "learning_rate": 7.186250651197971e-06, "loss": 17.5875, "step": 20531 }, { "epoch": 0.3753084614400351, "grad_norm": 7.238675373326088, "learning_rate": 7.185984432368669e-06, "loss": 17.5531, "step": 20532 }, { "epoch": 0.37532674063648164, "grad_norm": 5.654569050953152, "learning_rate": 7.185718205877681e-06, "loss": 17.1147, "step": 20533 }, { "epoch": 0.3753450198329281, "grad_norm": 6.4257274957213575, "learning_rate": 7.1854519717259416e-06, "loss": 17.4075, "step": 20534 }, { "epoch": 0.37536329902937465, "grad_norm": 6.637054571580879, "learning_rate": 7.185185729914383e-06, "loss": 17.5743, "step": 20535 }, { "epoch": 0.3753815782258212, "grad_norm": 7.637741798668585, "learning_rate": 7.184919480443936e-06, "loss": 17.9811, "step": 20536 }, { "epoch": 0.3753998574222677, "grad_norm": 6.407290892107951, "learning_rate": 7.184653223315535e-06, "loss": 17.5998, "step": 20537 }, { "epoch": 0.37541813661871426, "grad_norm": 7.749589762691165, "learning_rate": 7.184386958530117e-06, "loss": 17.7453, "step": 20538 }, { "epoch": 0.37543641581516074, "grad_norm": 6.905829872121333, "learning_rate": 7.184120686088612e-06, "loss": 17.4916, "step": 20539 }, { "epoch": 0.3754546950116073, "grad_norm": 5.7683542321228, "learning_rate": 7.183854405991952e-06, "loss": 17.1817, "step": 20540 }, { "epoch": 0.3754729742080538, "grad_norm": 5.981327578864603, "learning_rate": 7.183588118241072e-06, "loss": 17.3861, "step": 20541 }, { "epoch": 0.37549125340450035, "grad_norm": 7.5470328785879515, "learning_rate": 7.183321822836906e-06, "loss": 18.2527, "step": 20542 }, { "epoch": 0.3755095326009469, "grad_norm": 7.341944203848078, "learning_rate": 7.183055519780385e-06, "loss": 18.0454, "step": 20543 }, { "epoch": 0.37552781179739336, "grad_norm": 7.321986932501018, "learning_rate": 7.182789209072445e-06, "loss": 17.8002, "step": 20544 }, { "epoch": 0.3755460909938399, "grad_norm": 7.018625649196589, "learning_rate": 7.182522890714018e-06, "loss": 17.887, "step": 20545 }, { "epoch": 0.37556437019028643, "grad_norm": 6.703391618043084, "learning_rate": 7.182256564706039e-06, "loss": 17.6745, "step": 20546 }, { "epoch": 0.37558264938673297, "grad_norm": 7.104778415264163, "learning_rate": 7.181990231049437e-06, "loss": 17.6805, "step": 20547 }, { "epoch": 0.3756009285831795, "grad_norm": 8.684152435477817, "learning_rate": 7.181723889745151e-06, "loss": 18.46, "step": 20548 }, { "epoch": 0.375619207779626, "grad_norm": 7.3817123782854175, "learning_rate": 7.181457540794112e-06, "loss": 17.6573, "step": 20549 }, { "epoch": 0.3756374869760725, "grad_norm": 6.750336302395451, "learning_rate": 7.181191184197254e-06, "loss": 17.623, "step": 20550 }, { "epoch": 0.37565576617251906, "grad_norm": 5.881837910867297, "learning_rate": 7.180924819955508e-06, "loss": 17.1445, "step": 20551 }, { "epoch": 0.3756740453689656, "grad_norm": 7.534997164159258, "learning_rate": 7.180658448069811e-06, "loss": 17.9065, "step": 20552 }, { "epoch": 0.37569232456541213, "grad_norm": 9.022394311527165, "learning_rate": 7.180392068541095e-06, "loss": 18.0997, "step": 20553 }, { "epoch": 0.3757106037618586, "grad_norm": 7.275141752031852, "learning_rate": 7.180125681370296e-06, "loss": 17.596, "step": 20554 }, { "epoch": 0.37572888295830514, "grad_norm": 6.640547035363546, "learning_rate": 7.1798592865583425e-06, "loss": 17.6874, "step": 20555 }, { "epoch": 0.3757471621547517, "grad_norm": 5.768911845931946, "learning_rate": 7.179592884106174e-06, "loss": 17.4228, "step": 20556 }, { "epoch": 0.3757654413511982, "grad_norm": 6.99321745048219, "learning_rate": 7.179326474014721e-06, "loss": 17.7309, "step": 20557 }, { "epoch": 0.37578372054764475, "grad_norm": 7.839414507814109, "learning_rate": 7.179060056284917e-06, "loss": 17.9429, "step": 20558 }, { "epoch": 0.37580199974409123, "grad_norm": 5.920105089765995, "learning_rate": 7.178793630917696e-06, "loss": 17.2724, "step": 20559 }, { "epoch": 0.37582027894053777, "grad_norm": 5.438035983080217, "learning_rate": 7.178527197913994e-06, "loss": 17.1341, "step": 20560 }, { "epoch": 0.3758385581369843, "grad_norm": 6.888576926240088, "learning_rate": 7.178260757274742e-06, "loss": 17.8441, "step": 20561 }, { "epoch": 0.37585683733343084, "grad_norm": 6.94751405345727, "learning_rate": 7.177994309000876e-06, "loss": 17.1709, "step": 20562 }, { "epoch": 0.3758751165298774, "grad_norm": 7.568423208404413, "learning_rate": 7.1777278530933295e-06, "loss": 17.9601, "step": 20563 }, { "epoch": 0.37589339572632385, "grad_norm": 7.295671286163825, "learning_rate": 7.177461389553033e-06, "loss": 17.7052, "step": 20564 }, { "epoch": 0.3759116749227704, "grad_norm": 5.163917432288961, "learning_rate": 7.177194918380926e-06, "loss": 17.0495, "step": 20565 }, { "epoch": 0.3759299541192169, "grad_norm": 6.35829451976066, "learning_rate": 7.176928439577939e-06, "loss": 17.3866, "step": 20566 }, { "epoch": 0.37594823331566346, "grad_norm": 5.945944096813592, "learning_rate": 7.176661953145007e-06, "loss": 17.2316, "step": 20567 }, { "epoch": 0.37596651251210994, "grad_norm": 6.3620505848576006, "learning_rate": 7.176395459083063e-06, "loss": 17.3621, "step": 20568 }, { "epoch": 0.3759847917085565, "grad_norm": 8.249472346758372, "learning_rate": 7.1761289573930425e-06, "loss": 18.04, "step": 20569 }, { "epoch": 0.376003070905003, "grad_norm": 7.075767540520091, "learning_rate": 7.1758624480758776e-06, "loss": 17.421, "step": 20570 }, { "epoch": 0.37602135010144955, "grad_norm": 5.894654845925681, "learning_rate": 7.175595931132505e-06, "loss": 17.2072, "step": 20571 }, { "epoch": 0.3760396292978961, "grad_norm": 7.297821368762594, "learning_rate": 7.175329406563858e-06, "loss": 17.847, "step": 20572 }, { "epoch": 0.37605790849434256, "grad_norm": 7.334149966670582, "learning_rate": 7.175062874370868e-06, "loss": 17.9298, "step": 20573 }, { "epoch": 0.3760761876907891, "grad_norm": 6.938856531852848, "learning_rate": 7.174796334554473e-06, "loss": 17.8027, "step": 20574 }, { "epoch": 0.37609446688723563, "grad_norm": 5.923019276067725, "learning_rate": 7.174529787115605e-06, "loss": 17.2943, "step": 20575 }, { "epoch": 0.37611274608368217, "grad_norm": 6.37713460699952, "learning_rate": 7.174263232055198e-06, "loss": 17.4922, "step": 20576 }, { "epoch": 0.3761310252801287, "grad_norm": 6.8539348980493395, "learning_rate": 7.1739966693741894e-06, "loss": 17.7022, "step": 20577 }, { "epoch": 0.3761493044765752, "grad_norm": 7.476638742288511, "learning_rate": 7.1737300990735085e-06, "loss": 17.9302, "step": 20578 }, { "epoch": 0.3761675836730217, "grad_norm": 5.942880873420569, "learning_rate": 7.173463521154094e-06, "loss": 17.3077, "step": 20579 }, { "epoch": 0.37618586286946826, "grad_norm": 8.375990830522591, "learning_rate": 7.173196935616877e-06, "loss": 18.302, "step": 20580 }, { "epoch": 0.3762041420659148, "grad_norm": 6.2401076669671, "learning_rate": 7.172930342462795e-06, "loss": 17.3708, "step": 20581 }, { "epoch": 0.37622242126236133, "grad_norm": 5.788733961449134, "learning_rate": 7.17266374169278e-06, "loss": 17.364, "step": 20582 }, { "epoch": 0.3762407004588078, "grad_norm": 6.731599080527994, "learning_rate": 7.172397133307767e-06, "loss": 17.6133, "step": 20583 }, { "epoch": 0.37625897965525434, "grad_norm": 5.956999934736685, "learning_rate": 7.172130517308691e-06, "loss": 17.4725, "step": 20584 }, { "epoch": 0.3762772588517009, "grad_norm": 7.831714054322711, "learning_rate": 7.171863893696485e-06, "loss": 17.9058, "step": 20585 }, { "epoch": 0.3762955380481474, "grad_norm": 6.823454541730952, "learning_rate": 7.171597262472085e-06, "loss": 17.7316, "step": 20586 }, { "epoch": 0.37631381724459395, "grad_norm": 5.638160627563298, "learning_rate": 7.171330623636426e-06, "loss": 17.3128, "step": 20587 }, { "epoch": 0.37633209644104043, "grad_norm": 7.103100444250907, "learning_rate": 7.17106397719044e-06, "loss": 17.8114, "step": 20588 }, { "epoch": 0.37635037563748697, "grad_norm": 5.456728586507418, "learning_rate": 7.170797323135065e-06, "loss": 17.0995, "step": 20589 }, { "epoch": 0.3763686548339335, "grad_norm": 5.971389395298871, "learning_rate": 7.170530661471232e-06, "loss": 17.4495, "step": 20590 }, { "epoch": 0.37638693403038004, "grad_norm": 6.927627408702738, "learning_rate": 7.170263992199878e-06, "loss": 17.7794, "step": 20591 }, { "epoch": 0.3764052132268266, "grad_norm": 6.10809545916464, "learning_rate": 7.169997315321936e-06, "loss": 17.6278, "step": 20592 }, { "epoch": 0.37642349242327305, "grad_norm": 7.840387278475167, "learning_rate": 7.169730630838344e-06, "loss": 17.4599, "step": 20593 }, { "epoch": 0.3764417716197196, "grad_norm": 5.808072367256486, "learning_rate": 7.169463938750033e-06, "loss": 17.2614, "step": 20594 }, { "epoch": 0.3764600508161661, "grad_norm": 6.6618112247851675, "learning_rate": 7.169197239057939e-06, "loss": 17.4821, "step": 20595 }, { "epoch": 0.37647833001261266, "grad_norm": 6.159097442050367, "learning_rate": 7.168930531762998e-06, "loss": 17.3987, "step": 20596 }, { "epoch": 0.3764966092090592, "grad_norm": 5.5933978247039855, "learning_rate": 7.1686638168661425e-06, "loss": 17.1157, "step": 20597 }, { "epoch": 0.3765148884055057, "grad_norm": 6.900925113958627, "learning_rate": 7.168397094368309e-06, "loss": 17.7539, "step": 20598 }, { "epoch": 0.3765331676019522, "grad_norm": 6.619261728696811, "learning_rate": 7.168130364270431e-06, "loss": 17.4448, "step": 20599 }, { "epoch": 0.37655144679839875, "grad_norm": 7.609053670754561, "learning_rate": 7.167863626573446e-06, "loss": 18.3832, "step": 20600 }, { "epoch": 0.3765697259948453, "grad_norm": 5.803236237706797, "learning_rate": 7.167596881278285e-06, "loss": 17.4427, "step": 20601 }, { "epoch": 0.37658800519129176, "grad_norm": 7.273994144546687, "learning_rate": 7.167330128385886e-06, "loss": 17.775, "step": 20602 }, { "epoch": 0.3766062843877383, "grad_norm": 7.463503272074474, "learning_rate": 7.167063367897184e-06, "loss": 17.6611, "step": 20603 }, { "epoch": 0.37662456358418483, "grad_norm": 7.0457114727194945, "learning_rate": 7.1667965998131124e-06, "loss": 17.7985, "step": 20604 }, { "epoch": 0.37664284278063137, "grad_norm": 6.895245411245836, "learning_rate": 7.166529824134606e-06, "loss": 17.6429, "step": 20605 }, { "epoch": 0.3766611219770779, "grad_norm": 4.420823489714812, "learning_rate": 7.1662630408626e-06, "loss": 16.8286, "step": 20606 }, { "epoch": 0.3766794011735244, "grad_norm": 5.763056319838112, "learning_rate": 7.165996249998033e-06, "loss": 17.3129, "step": 20607 }, { "epoch": 0.3766976803699709, "grad_norm": 6.765938780753487, "learning_rate": 7.165729451541834e-06, "loss": 17.5902, "step": 20608 }, { "epoch": 0.37671595956641746, "grad_norm": 6.233005257901531, "learning_rate": 7.165462645494943e-06, "loss": 17.1538, "step": 20609 }, { "epoch": 0.376734238762864, "grad_norm": 6.994044793039864, "learning_rate": 7.165195831858293e-06, "loss": 17.6151, "step": 20610 }, { "epoch": 0.37675251795931053, "grad_norm": 4.5691847886265275, "learning_rate": 7.164929010632818e-06, "loss": 16.8158, "step": 20611 }, { "epoch": 0.376770797155757, "grad_norm": 5.113949836479145, "learning_rate": 7.164662181819456e-06, "loss": 16.9211, "step": 20612 }, { "epoch": 0.37678907635220354, "grad_norm": 6.731286088024672, "learning_rate": 7.164395345419141e-06, "loss": 17.5905, "step": 20613 }, { "epoch": 0.3768073555486501, "grad_norm": 5.779281914613035, "learning_rate": 7.164128501432808e-06, "loss": 17.4421, "step": 20614 }, { "epoch": 0.3768256347450966, "grad_norm": 7.339046452482347, "learning_rate": 7.163861649861392e-06, "loss": 18.3369, "step": 20615 }, { "epoch": 0.37684391394154315, "grad_norm": 6.884621453446428, "learning_rate": 7.163594790705829e-06, "loss": 17.8277, "step": 20616 }, { "epoch": 0.37686219313798963, "grad_norm": 5.655586210170619, "learning_rate": 7.163327923967055e-06, "loss": 17.1877, "step": 20617 }, { "epoch": 0.37688047233443617, "grad_norm": 8.771523681859211, "learning_rate": 7.163061049646003e-06, "loss": 18.0978, "step": 20618 }, { "epoch": 0.3768987515308827, "grad_norm": 5.9379482109307595, "learning_rate": 7.1627941677436085e-06, "loss": 17.5278, "step": 20619 }, { "epoch": 0.37691703072732924, "grad_norm": 5.382141824677348, "learning_rate": 7.162527278260811e-06, "loss": 17.233, "step": 20620 }, { "epoch": 0.3769353099237758, "grad_norm": 6.069812859379751, "learning_rate": 7.162260381198541e-06, "loss": 17.3415, "step": 20621 }, { "epoch": 0.37695358912022225, "grad_norm": 5.784579558492788, "learning_rate": 7.161993476557737e-06, "loss": 17.3677, "step": 20622 }, { "epoch": 0.3769718683166688, "grad_norm": 7.2001587613957065, "learning_rate": 7.161726564339333e-06, "loss": 18.0532, "step": 20623 }, { "epoch": 0.3769901475131153, "grad_norm": 6.8242809603122145, "learning_rate": 7.161459644544265e-06, "loss": 17.3681, "step": 20624 }, { "epoch": 0.37700842670956186, "grad_norm": 7.612123828849303, "learning_rate": 7.161192717173469e-06, "loss": 18.4676, "step": 20625 }, { "epoch": 0.3770267059060084, "grad_norm": 5.312600056096935, "learning_rate": 7.16092578222788e-06, "loss": 17.2113, "step": 20626 }, { "epoch": 0.3770449851024549, "grad_norm": 7.309212073447616, "learning_rate": 7.160658839708433e-06, "loss": 17.8747, "step": 20627 }, { "epoch": 0.3770632642989014, "grad_norm": 7.697322528872857, "learning_rate": 7.1603918896160655e-06, "loss": 18.2026, "step": 20628 }, { "epoch": 0.37708154349534795, "grad_norm": 9.588027842999113, "learning_rate": 7.160124931951711e-06, "loss": 18.5526, "step": 20629 }, { "epoch": 0.3770998226917945, "grad_norm": 5.977328879269785, "learning_rate": 7.1598579667163045e-06, "loss": 17.5158, "step": 20630 }, { "epoch": 0.377118101888241, "grad_norm": 6.09204584799851, "learning_rate": 7.1595909939107864e-06, "loss": 17.3506, "step": 20631 }, { "epoch": 0.3771363810846875, "grad_norm": 6.83840505088421, "learning_rate": 7.159324013536086e-06, "loss": 17.6528, "step": 20632 }, { "epoch": 0.37715466028113404, "grad_norm": 5.7778449402272765, "learning_rate": 7.159057025593145e-06, "loss": 17.2363, "step": 20633 }, { "epoch": 0.37717293947758057, "grad_norm": 6.64526238744069, "learning_rate": 7.158790030082896e-06, "loss": 17.5401, "step": 20634 }, { "epoch": 0.3771912186740271, "grad_norm": 5.803148609951093, "learning_rate": 7.158523027006275e-06, "loss": 17.439, "step": 20635 }, { "epoch": 0.3772094978704736, "grad_norm": 6.60449995898493, "learning_rate": 7.158256016364218e-06, "loss": 17.4628, "step": 20636 }, { "epoch": 0.3772277770669201, "grad_norm": 5.7326802198899705, "learning_rate": 7.157988998157659e-06, "loss": 17.0555, "step": 20637 }, { "epoch": 0.37724605626336666, "grad_norm": 7.460395265804034, "learning_rate": 7.157721972387539e-06, "loss": 17.85, "step": 20638 }, { "epoch": 0.3772643354598132, "grad_norm": 6.627173442440888, "learning_rate": 7.157454939054788e-06, "loss": 17.5353, "step": 20639 }, { "epoch": 0.37728261465625973, "grad_norm": 5.914285399030155, "learning_rate": 7.157187898160347e-06, "loss": 17.3655, "step": 20640 }, { "epoch": 0.3773008938527062, "grad_norm": 5.620032876235361, "learning_rate": 7.156920849705149e-06, "loss": 17.2645, "step": 20641 }, { "epoch": 0.37731917304915275, "grad_norm": 7.513128525274539, "learning_rate": 7.156653793690129e-06, "loss": 17.9416, "step": 20642 }, { "epoch": 0.3773374522455993, "grad_norm": 6.12097987736912, "learning_rate": 7.156386730116227e-06, "loss": 17.4376, "step": 20643 }, { "epoch": 0.3773557314420458, "grad_norm": 6.206280891072118, "learning_rate": 7.156119658984375e-06, "loss": 17.4708, "step": 20644 }, { "epoch": 0.37737401063849235, "grad_norm": 6.309603134178015, "learning_rate": 7.155852580295513e-06, "loss": 17.4303, "step": 20645 }, { "epoch": 0.37739228983493883, "grad_norm": 6.091201990564179, "learning_rate": 7.155585494050571e-06, "loss": 17.1456, "step": 20646 }, { "epoch": 0.37741056903138537, "grad_norm": 6.393127162720256, "learning_rate": 7.15531840025049e-06, "loss": 17.6871, "step": 20647 }, { "epoch": 0.3774288482278319, "grad_norm": 5.684010274567867, "learning_rate": 7.155051298896207e-06, "loss": 17.1029, "step": 20648 }, { "epoch": 0.37744712742427844, "grad_norm": 5.859678024083746, "learning_rate": 7.154784189988655e-06, "loss": 17.3521, "step": 20649 }, { "epoch": 0.377465406620725, "grad_norm": 6.493662822507453, "learning_rate": 7.154517073528772e-06, "loss": 17.4507, "step": 20650 }, { "epoch": 0.37748368581717145, "grad_norm": 7.461636475787436, "learning_rate": 7.154249949517493e-06, "loss": 18.1048, "step": 20651 }, { "epoch": 0.377501965013618, "grad_norm": 8.337347370485924, "learning_rate": 7.153982817955755e-06, "loss": 18.4426, "step": 20652 }, { "epoch": 0.3775202442100645, "grad_norm": 7.217298307508891, "learning_rate": 7.153715678844494e-06, "loss": 17.9003, "step": 20653 }, { "epoch": 0.37753852340651106, "grad_norm": 6.72731521926569, "learning_rate": 7.153448532184646e-06, "loss": 17.7426, "step": 20654 }, { "epoch": 0.3775568026029576, "grad_norm": 7.578537547101339, "learning_rate": 7.153181377977148e-06, "loss": 17.8402, "step": 20655 }, { "epoch": 0.3775750817994041, "grad_norm": 6.645035765757099, "learning_rate": 7.152914216222937e-06, "loss": 17.6881, "step": 20656 }, { "epoch": 0.3775933609958506, "grad_norm": 7.351151228534585, "learning_rate": 7.152647046922947e-06, "loss": 18.1416, "step": 20657 }, { "epoch": 0.37761164019229715, "grad_norm": 6.366196253922736, "learning_rate": 7.152379870078119e-06, "loss": 17.3985, "step": 20658 }, { "epoch": 0.3776299193887437, "grad_norm": 7.008910741742748, "learning_rate": 7.152112685689383e-06, "loss": 17.9082, "step": 20659 }, { "epoch": 0.3776481985851902, "grad_norm": 6.443878939319213, "learning_rate": 7.151845493757679e-06, "loss": 17.4953, "step": 20660 }, { "epoch": 0.3776664777816367, "grad_norm": 8.356011214369085, "learning_rate": 7.151578294283944e-06, "loss": 18.0463, "step": 20661 }, { "epoch": 0.37768475697808324, "grad_norm": 5.906777639110561, "learning_rate": 7.151311087269115e-06, "loss": 17.2742, "step": 20662 }, { "epoch": 0.37770303617452977, "grad_norm": 7.20076130382401, "learning_rate": 7.151043872714126e-06, "loss": 17.7422, "step": 20663 }, { "epoch": 0.3777213153709763, "grad_norm": 6.3545204646164954, "learning_rate": 7.150776650619915e-06, "loss": 17.4427, "step": 20664 }, { "epoch": 0.37773959456742284, "grad_norm": 6.162254158058019, "learning_rate": 7.15050942098742e-06, "loss": 17.161, "step": 20665 }, { "epoch": 0.3777578737638693, "grad_norm": 7.150157393780649, "learning_rate": 7.150242183817574e-06, "loss": 18.031, "step": 20666 }, { "epoch": 0.37777615296031586, "grad_norm": 6.03951850120098, "learning_rate": 7.149974939111316e-06, "loss": 17.2216, "step": 20667 }, { "epoch": 0.3777944321567624, "grad_norm": 8.161372667305617, "learning_rate": 7.149707686869582e-06, "loss": 18.3367, "step": 20668 }, { "epoch": 0.37781271135320893, "grad_norm": 6.55974120489108, "learning_rate": 7.149440427093311e-06, "loss": 17.6477, "step": 20669 }, { "epoch": 0.3778309905496554, "grad_norm": 5.8649574848977215, "learning_rate": 7.149173159783437e-06, "loss": 17.6777, "step": 20670 }, { "epoch": 0.37784926974610195, "grad_norm": 7.815085048357885, "learning_rate": 7.148905884940898e-06, "loss": 17.8094, "step": 20671 }, { "epoch": 0.3778675489425485, "grad_norm": 5.209538297226656, "learning_rate": 7.14863860256663e-06, "loss": 17.1547, "step": 20672 }, { "epoch": 0.377885828138995, "grad_norm": 6.423224718314327, "learning_rate": 7.1483713126615685e-06, "loss": 17.8088, "step": 20673 }, { "epoch": 0.37790410733544155, "grad_norm": 5.678146266476477, "learning_rate": 7.148104015226653e-06, "loss": 17.3624, "step": 20674 }, { "epoch": 0.37792238653188803, "grad_norm": 6.250913610206943, "learning_rate": 7.14783671026282e-06, "loss": 17.5371, "step": 20675 }, { "epoch": 0.37794066572833457, "grad_norm": 7.075915479019987, "learning_rate": 7.147569397771006e-06, "loss": 17.9878, "step": 20676 }, { "epoch": 0.3779589449247811, "grad_norm": 5.2718456219478655, "learning_rate": 7.1473020777521474e-06, "loss": 16.95, "step": 20677 }, { "epoch": 0.37797722412122764, "grad_norm": 6.692447361100275, "learning_rate": 7.1470347502071804e-06, "loss": 17.5211, "step": 20678 }, { "epoch": 0.3779955033176742, "grad_norm": 5.869065536809518, "learning_rate": 7.146767415137044e-06, "loss": 17.4061, "step": 20679 }, { "epoch": 0.37801378251412066, "grad_norm": 5.385633023882873, "learning_rate": 7.146500072542672e-06, "loss": 17.283, "step": 20680 }, { "epoch": 0.3780320617105672, "grad_norm": 6.259513688434857, "learning_rate": 7.146232722425006e-06, "loss": 17.4676, "step": 20681 }, { "epoch": 0.3780503409070137, "grad_norm": 7.565017404182663, "learning_rate": 7.145965364784979e-06, "loss": 17.7473, "step": 20682 }, { "epoch": 0.37806862010346026, "grad_norm": 6.2425939611643475, "learning_rate": 7.145697999623531e-06, "loss": 17.3411, "step": 20683 }, { "epoch": 0.3780868992999068, "grad_norm": 6.515403647706122, "learning_rate": 7.145430626941596e-06, "loss": 17.5335, "step": 20684 }, { "epoch": 0.3781051784963533, "grad_norm": 5.913250439289875, "learning_rate": 7.145163246740114e-06, "loss": 17.2398, "step": 20685 }, { "epoch": 0.3781234576927998, "grad_norm": 6.178161015518975, "learning_rate": 7.144895859020022e-06, "loss": 17.678, "step": 20686 }, { "epoch": 0.37814173688924635, "grad_norm": 6.245587199046832, "learning_rate": 7.144628463782254e-06, "loss": 17.3644, "step": 20687 }, { "epoch": 0.3781600160856929, "grad_norm": 9.519394063770232, "learning_rate": 7.14436106102775e-06, "loss": 18.4064, "step": 20688 }, { "epoch": 0.3781782952821394, "grad_norm": 8.162139299355768, "learning_rate": 7.144093650757448e-06, "loss": 18.1407, "step": 20689 }, { "epoch": 0.3781965744785859, "grad_norm": 8.207625117832002, "learning_rate": 7.1438262329722816e-06, "loss": 17.5477, "step": 20690 }, { "epoch": 0.37821485367503244, "grad_norm": 6.423680900033887, "learning_rate": 7.143558807673191e-06, "loss": 17.316, "step": 20691 }, { "epoch": 0.37823313287147897, "grad_norm": 6.368076140486672, "learning_rate": 7.143291374861113e-06, "loss": 17.5855, "step": 20692 }, { "epoch": 0.3782514120679255, "grad_norm": 6.50904391012658, "learning_rate": 7.143023934536986e-06, "loss": 17.2793, "step": 20693 }, { "epoch": 0.37826969126437204, "grad_norm": 7.412766496640174, "learning_rate": 7.142756486701744e-06, "loss": 17.9571, "step": 20694 }, { "epoch": 0.3782879704608185, "grad_norm": 6.464074321628694, "learning_rate": 7.142489031356328e-06, "loss": 17.6333, "step": 20695 }, { "epoch": 0.37830624965726506, "grad_norm": 6.848667173751901, "learning_rate": 7.1422215685016725e-06, "loss": 17.7922, "step": 20696 }, { "epoch": 0.3783245288537116, "grad_norm": 6.335291254994485, "learning_rate": 7.141954098138717e-06, "loss": 17.5243, "step": 20697 }, { "epoch": 0.37834280805015813, "grad_norm": 5.87701609881068, "learning_rate": 7.1416866202683975e-06, "loss": 17.2739, "step": 20698 }, { "epoch": 0.37836108724660467, "grad_norm": 6.276542507763416, "learning_rate": 7.141419134891654e-06, "loss": 17.4585, "step": 20699 }, { "epoch": 0.37837936644305115, "grad_norm": 5.95125091657514, "learning_rate": 7.141151642009421e-06, "loss": 17.2629, "step": 20700 }, { "epoch": 0.3783976456394977, "grad_norm": 6.099762326585936, "learning_rate": 7.1408841416226375e-06, "loss": 17.2621, "step": 20701 }, { "epoch": 0.3784159248359442, "grad_norm": 6.472944595990493, "learning_rate": 7.14061663373224e-06, "loss": 17.6128, "step": 20702 }, { "epoch": 0.37843420403239075, "grad_norm": 7.422113554229637, "learning_rate": 7.1403491183391695e-06, "loss": 17.966, "step": 20703 }, { "epoch": 0.37845248322883723, "grad_norm": 6.347877429952918, "learning_rate": 7.140081595444359e-06, "loss": 17.2928, "step": 20704 }, { "epoch": 0.37847076242528377, "grad_norm": 6.016001465431868, "learning_rate": 7.139814065048748e-06, "loss": 17.2871, "step": 20705 }, { "epoch": 0.3784890416217303, "grad_norm": 6.682169672155171, "learning_rate": 7.139546527153275e-06, "loss": 17.435, "step": 20706 }, { "epoch": 0.37850732081817684, "grad_norm": 6.188439116903879, "learning_rate": 7.139278981758878e-06, "loss": 17.6462, "step": 20707 }, { "epoch": 0.3785256000146234, "grad_norm": 6.600287770503797, "learning_rate": 7.139011428866493e-06, "loss": 17.3779, "step": 20708 }, { "epoch": 0.37854387921106986, "grad_norm": 5.474955385579385, "learning_rate": 7.1387438684770585e-06, "loss": 16.9861, "step": 20709 }, { "epoch": 0.3785621584075164, "grad_norm": 5.5985919635142745, "learning_rate": 7.138476300591511e-06, "loss": 17.2531, "step": 20710 }, { "epoch": 0.3785804376039629, "grad_norm": 5.726156526690359, "learning_rate": 7.138208725210791e-06, "loss": 17.3002, "step": 20711 }, { "epoch": 0.37859871680040946, "grad_norm": 6.581332163686492, "learning_rate": 7.137941142335836e-06, "loss": 17.4097, "step": 20712 }, { "epoch": 0.378616995996856, "grad_norm": 7.045607385744825, "learning_rate": 7.137673551967581e-06, "loss": 17.8435, "step": 20713 }, { "epoch": 0.3786352751933025, "grad_norm": 6.638400931802244, "learning_rate": 7.1374059541069665e-06, "loss": 17.755, "step": 20714 }, { "epoch": 0.378653554389749, "grad_norm": 7.596801101096576, "learning_rate": 7.1371383487549296e-06, "loss": 17.7562, "step": 20715 }, { "epoch": 0.37867183358619555, "grad_norm": 9.420613093821757, "learning_rate": 7.136870735912408e-06, "loss": 18.0275, "step": 20716 }, { "epoch": 0.3786901127826421, "grad_norm": 6.237802438989488, "learning_rate": 7.13660311558034e-06, "loss": 17.5701, "step": 20717 }, { "epoch": 0.3787083919790886, "grad_norm": 8.624445529734015, "learning_rate": 7.136335487759664e-06, "loss": 18.3155, "step": 20718 }, { "epoch": 0.3787266711755351, "grad_norm": 7.878952553669444, "learning_rate": 7.136067852451316e-06, "loss": 18.0405, "step": 20719 }, { "epoch": 0.37874495037198164, "grad_norm": 4.998626687964603, "learning_rate": 7.135800209656238e-06, "loss": 16.9398, "step": 20720 }, { "epoch": 0.3787632295684282, "grad_norm": 6.383723286774549, "learning_rate": 7.135532559375364e-06, "loss": 17.6559, "step": 20721 }, { "epoch": 0.3787815087648747, "grad_norm": 8.815173822229411, "learning_rate": 7.135264901609633e-06, "loss": 18.904, "step": 20722 }, { "epoch": 0.37879978796132124, "grad_norm": 7.22992635093191, "learning_rate": 7.134997236359984e-06, "loss": 17.6525, "step": 20723 }, { "epoch": 0.3788180671577677, "grad_norm": 5.9283624355310165, "learning_rate": 7.134729563627356e-06, "loss": 17.2961, "step": 20724 }, { "epoch": 0.37883634635421426, "grad_norm": 7.719535222571975, "learning_rate": 7.134461883412686e-06, "loss": 17.9128, "step": 20725 }, { "epoch": 0.3788546255506608, "grad_norm": 7.733402061159149, "learning_rate": 7.134194195716912e-06, "loss": 17.5171, "step": 20726 }, { "epoch": 0.37887290474710733, "grad_norm": 6.445478746516123, "learning_rate": 7.133926500540973e-06, "loss": 17.4731, "step": 20727 }, { "epoch": 0.37889118394355387, "grad_norm": 6.5167505852322565, "learning_rate": 7.133658797885806e-06, "loss": 17.68, "step": 20728 }, { "epoch": 0.37890946314000035, "grad_norm": 5.421027084675409, "learning_rate": 7.1333910877523505e-06, "loss": 17.1143, "step": 20729 }, { "epoch": 0.3789277423364469, "grad_norm": 6.159280335343325, "learning_rate": 7.1331233701415445e-06, "loss": 17.4021, "step": 20730 }, { "epoch": 0.3789460215328934, "grad_norm": 6.331667450781991, "learning_rate": 7.132855645054326e-06, "loss": 17.3254, "step": 20731 }, { "epoch": 0.37896430072933995, "grad_norm": 5.962973632759887, "learning_rate": 7.1325879124916335e-06, "loss": 17.3799, "step": 20732 }, { "epoch": 0.3789825799257865, "grad_norm": 9.026509699935183, "learning_rate": 7.1323201724544054e-06, "loss": 17.743, "step": 20733 }, { "epoch": 0.37900085912223297, "grad_norm": 7.870083216701158, "learning_rate": 7.13205242494358e-06, "loss": 17.9164, "step": 20734 }, { "epoch": 0.3790191383186795, "grad_norm": 6.315996277779612, "learning_rate": 7.131784669960097e-06, "loss": 17.4389, "step": 20735 }, { "epoch": 0.37903741751512604, "grad_norm": 6.230132327519647, "learning_rate": 7.1315169075048925e-06, "loss": 17.5464, "step": 20736 }, { "epoch": 0.3790556967115726, "grad_norm": 6.512243512904325, "learning_rate": 7.131249137578906e-06, "loss": 17.4722, "step": 20737 }, { "epoch": 0.37907397590801906, "grad_norm": 6.23420620772002, "learning_rate": 7.130981360183078e-06, "loss": 17.5325, "step": 20738 }, { "epoch": 0.3790922551044656, "grad_norm": 6.393638833110737, "learning_rate": 7.130713575318343e-06, "loss": 17.6389, "step": 20739 }, { "epoch": 0.3791105343009121, "grad_norm": 6.95356601842442, "learning_rate": 7.130445782985643e-06, "loss": 17.6849, "step": 20740 }, { "epoch": 0.37912881349735866, "grad_norm": 7.321391171174789, "learning_rate": 7.1301779831859155e-06, "loss": 17.7158, "step": 20741 }, { "epoch": 0.3791470926938052, "grad_norm": 6.270581262878128, "learning_rate": 7.129910175920098e-06, "loss": 17.5712, "step": 20742 }, { "epoch": 0.3791653718902517, "grad_norm": 5.7544699683778076, "learning_rate": 7.129642361189131e-06, "loss": 17.1531, "step": 20743 }, { "epoch": 0.3791836510866982, "grad_norm": 5.94292761915387, "learning_rate": 7.129374538993952e-06, "loss": 17.3732, "step": 20744 }, { "epoch": 0.37920193028314475, "grad_norm": 6.793645401089376, "learning_rate": 7.129106709335502e-06, "loss": 17.7743, "step": 20745 }, { "epoch": 0.3792202094795913, "grad_norm": 5.669400652861999, "learning_rate": 7.128838872214714e-06, "loss": 17.4094, "step": 20746 }, { "epoch": 0.3792384886760378, "grad_norm": 6.0989341256543135, "learning_rate": 7.128571027632533e-06, "loss": 17.5634, "step": 20747 }, { "epoch": 0.3792567678724843, "grad_norm": 6.06441229343946, "learning_rate": 7.128303175589894e-06, "loss": 17.439, "step": 20748 }, { "epoch": 0.37927504706893084, "grad_norm": 6.35104589498538, "learning_rate": 7.128035316087738e-06, "loss": 17.6118, "step": 20749 }, { "epoch": 0.3792933262653774, "grad_norm": 7.39298736376242, "learning_rate": 7.127767449127003e-06, "loss": 17.9437, "step": 20750 }, { "epoch": 0.3793116054618239, "grad_norm": 9.08367253886659, "learning_rate": 7.127499574708626e-06, "loss": 18.5506, "step": 20751 }, { "epoch": 0.37932988465827044, "grad_norm": 6.919256850079278, "learning_rate": 7.12723169283355e-06, "loss": 17.8666, "step": 20752 }, { "epoch": 0.3793481638547169, "grad_norm": 7.263917032680766, "learning_rate": 7.126963803502711e-06, "loss": 17.9848, "step": 20753 }, { "epoch": 0.37936644305116346, "grad_norm": 6.694937443856486, "learning_rate": 7.126695906717047e-06, "loss": 17.6934, "step": 20754 }, { "epoch": 0.37938472224761, "grad_norm": 7.969832870115532, "learning_rate": 7.1264280024775e-06, "loss": 18.1185, "step": 20755 }, { "epoch": 0.37940300144405653, "grad_norm": 6.4036621065899, "learning_rate": 7.126160090785006e-06, "loss": 17.5468, "step": 20756 }, { "epoch": 0.37942128064050307, "grad_norm": 5.827606480174566, "learning_rate": 7.125892171640506e-06, "loss": 17.217, "step": 20757 }, { "epoch": 0.37943955983694955, "grad_norm": 5.506119830132652, "learning_rate": 7.12562424504494e-06, "loss": 17.0894, "step": 20758 }, { "epoch": 0.3794578390333961, "grad_norm": 7.0339410972391745, "learning_rate": 7.125356310999243e-06, "loss": 17.8281, "step": 20759 }, { "epoch": 0.3794761182298426, "grad_norm": 6.1191833339710815, "learning_rate": 7.125088369504357e-06, "loss": 17.2776, "step": 20760 }, { "epoch": 0.37949439742628915, "grad_norm": 6.140600884159622, "learning_rate": 7.124820420561221e-06, "loss": 17.3743, "step": 20761 }, { "epoch": 0.3795126766227357, "grad_norm": 7.652709346580124, "learning_rate": 7.124552464170775e-06, "loss": 18.0969, "step": 20762 }, { "epoch": 0.37953095581918217, "grad_norm": 5.975453658052, "learning_rate": 7.124284500333955e-06, "loss": 17.5479, "step": 20763 }, { "epoch": 0.3795492350156287, "grad_norm": 5.893502341608432, "learning_rate": 7.124016529051703e-06, "loss": 17.3345, "step": 20764 }, { "epoch": 0.37956751421207524, "grad_norm": 6.661394297806554, "learning_rate": 7.1237485503249585e-06, "loss": 17.5253, "step": 20765 }, { "epoch": 0.3795857934085218, "grad_norm": 5.5381015433825596, "learning_rate": 7.123480564154659e-06, "loss": 17.033, "step": 20766 }, { "epoch": 0.3796040726049683, "grad_norm": 6.238925559423806, "learning_rate": 7.123212570541743e-06, "loss": 17.5199, "step": 20767 }, { "epoch": 0.3796223518014148, "grad_norm": 6.2780918759583795, "learning_rate": 7.122944569487153e-06, "loss": 17.4589, "step": 20768 }, { "epoch": 0.37964063099786133, "grad_norm": 5.659419884527585, "learning_rate": 7.122676560991826e-06, "loss": 17.1041, "step": 20769 }, { "epoch": 0.37965891019430786, "grad_norm": 7.404834297493874, "learning_rate": 7.1224085450567e-06, "loss": 17.856, "step": 20770 }, { "epoch": 0.3796771893907544, "grad_norm": 6.210689993677177, "learning_rate": 7.122140521682719e-06, "loss": 17.309, "step": 20771 }, { "epoch": 0.3796954685872009, "grad_norm": 4.809070659389534, "learning_rate": 7.121872490870818e-06, "loss": 16.861, "step": 20772 }, { "epoch": 0.3797137477836474, "grad_norm": 7.589711016124053, "learning_rate": 7.1216044526219375e-06, "loss": 17.6736, "step": 20773 }, { "epoch": 0.37973202698009395, "grad_norm": 6.303364912652323, "learning_rate": 7.121336406937018e-06, "loss": 17.6306, "step": 20774 }, { "epoch": 0.3797503061765405, "grad_norm": 7.443107831532645, "learning_rate": 7.121068353817e-06, "loss": 18.2774, "step": 20775 }, { "epoch": 0.379768585372987, "grad_norm": 6.809614136229891, "learning_rate": 7.120800293262821e-06, "loss": 17.7834, "step": 20776 }, { "epoch": 0.3797868645694335, "grad_norm": 5.856678594502468, "learning_rate": 7.1205322252754206e-06, "loss": 17.2173, "step": 20777 }, { "epoch": 0.37980514376588004, "grad_norm": 7.222293962955577, "learning_rate": 7.120264149855738e-06, "loss": 17.7268, "step": 20778 }, { "epoch": 0.3798234229623266, "grad_norm": 5.724796100158445, "learning_rate": 7.119996067004714e-06, "loss": 17.3644, "step": 20779 }, { "epoch": 0.3798417021587731, "grad_norm": 5.224426821291531, "learning_rate": 7.119727976723289e-06, "loss": 17.0199, "step": 20780 }, { "epoch": 0.37985998135521964, "grad_norm": 6.378530739003206, "learning_rate": 7.119459879012399e-06, "loss": 17.5581, "step": 20781 }, { "epoch": 0.3798782605516661, "grad_norm": 7.493453941456023, "learning_rate": 7.119191773872988e-06, "loss": 17.8533, "step": 20782 }, { "epoch": 0.37989653974811266, "grad_norm": 7.181188667677754, "learning_rate": 7.118923661305992e-06, "loss": 17.936, "step": 20783 }, { "epoch": 0.3799148189445592, "grad_norm": 6.825187861916647, "learning_rate": 7.118655541312354e-06, "loss": 17.8693, "step": 20784 }, { "epoch": 0.37993309814100573, "grad_norm": 6.078822640548004, "learning_rate": 7.118387413893011e-06, "loss": 17.4699, "step": 20785 }, { "epoch": 0.37995137733745227, "grad_norm": 6.949376025687311, "learning_rate": 7.118119279048905e-06, "loss": 17.6847, "step": 20786 }, { "epoch": 0.37996965653389875, "grad_norm": 6.2332728126778605, "learning_rate": 7.117851136780974e-06, "loss": 17.6239, "step": 20787 }, { "epoch": 0.3799879357303453, "grad_norm": 10.07927838175929, "learning_rate": 7.1175829870901595e-06, "loss": 18.2738, "step": 20788 }, { "epoch": 0.3800062149267918, "grad_norm": 6.965522917367423, "learning_rate": 7.1173148299774e-06, "loss": 17.8364, "step": 20789 }, { "epoch": 0.38002449412323835, "grad_norm": 6.24263074851901, "learning_rate": 7.117046665443635e-06, "loss": 17.5051, "step": 20790 }, { "epoch": 0.3800427733196849, "grad_norm": 6.529845780698912, "learning_rate": 7.1167784934898044e-06, "loss": 17.8506, "step": 20791 }, { "epoch": 0.38006105251613137, "grad_norm": 6.713118413498703, "learning_rate": 7.116510314116851e-06, "loss": 17.42, "step": 20792 }, { "epoch": 0.3800793317125779, "grad_norm": 5.7589875249438425, "learning_rate": 7.116242127325712e-06, "loss": 17.3183, "step": 20793 }, { "epoch": 0.38009761090902444, "grad_norm": 5.604574003598077, "learning_rate": 7.115973933117328e-06, "loss": 17.4576, "step": 20794 }, { "epoch": 0.380115890105471, "grad_norm": 5.863492297018321, "learning_rate": 7.115705731492638e-06, "loss": 17.2632, "step": 20795 }, { "epoch": 0.3801341693019175, "grad_norm": 7.712852658319658, "learning_rate": 7.115437522452584e-06, "loss": 17.8007, "step": 20796 }, { "epoch": 0.380152448498364, "grad_norm": 5.439655829759279, "learning_rate": 7.115169305998104e-06, "loss": 16.9575, "step": 20797 }, { "epoch": 0.38017072769481053, "grad_norm": 6.5774090196801, "learning_rate": 7.1149010821301404e-06, "loss": 17.4319, "step": 20798 }, { "epoch": 0.38018900689125706, "grad_norm": 6.539854924193836, "learning_rate": 7.1146328508496325e-06, "loss": 17.3212, "step": 20799 }, { "epoch": 0.3802072860877036, "grad_norm": 6.136157464951033, "learning_rate": 7.1143646121575185e-06, "loss": 17.4212, "step": 20800 }, { "epoch": 0.38022556528415014, "grad_norm": 5.321499503310928, "learning_rate": 7.1140963660547394e-06, "loss": 17.2692, "step": 20801 }, { "epoch": 0.3802438444805966, "grad_norm": 5.402814100503582, "learning_rate": 7.113828112542237e-06, "loss": 17.0028, "step": 20802 }, { "epoch": 0.38026212367704315, "grad_norm": 7.2308965458430885, "learning_rate": 7.1135598516209515e-06, "loss": 17.8786, "step": 20803 }, { "epoch": 0.3802804028734897, "grad_norm": 9.61500681048284, "learning_rate": 7.113291583291821e-06, "loss": 18.4204, "step": 20804 }, { "epoch": 0.3802986820699362, "grad_norm": 6.106425624782197, "learning_rate": 7.113023307555786e-06, "loss": 17.2703, "step": 20805 }, { "epoch": 0.3803169612663827, "grad_norm": 6.99745351777501, "learning_rate": 7.1127550244137885e-06, "loss": 17.6546, "step": 20806 }, { "epoch": 0.38033524046282924, "grad_norm": 6.538338269757374, "learning_rate": 7.112486733866769e-06, "loss": 17.8167, "step": 20807 }, { "epoch": 0.3803535196592758, "grad_norm": 7.364576068447406, "learning_rate": 7.112218435915667e-06, "loss": 17.8228, "step": 20808 }, { "epoch": 0.3803717988557223, "grad_norm": 6.757236656425216, "learning_rate": 7.111950130561421e-06, "loss": 17.7835, "step": 20809 }, { "epoch": 0.38039007805216885, "grad_norm": 5.933350921832651, "learning_rate": 7.111681817804974e-06, "loss": 17.1217, "step": 20810 }, { "epoch": 0.3804083572486153, "grad_norm": 6.232631490358939, "learning_rate": 7.111413497647265e-06, "loss": 17.4871, "step": 20811 }, { "epoch": 0.38042663644506186, "grad_norm": 6.275613415622698, "learning_rate": 7.1111451700892355e-06, "loss": 17.231, "step": 20812 }, { "epoch": 0.3804449156415084, "grad_norm": 6.23796317998585, "learning_rate": 7.110876835131824e-06, "loss": 17.502, "step": 20813 }, { "epoch": 0.38046319483795493, "grad_norm": 6.2602286933265905, "learning_rate": 7.110608492775974e-06, "loss": 17.3774, "step": 20814 }, { "epoch": 0.38048147403440147, "grad_norm": 6.931666805961042, "learning_rate": 7.1103401430226225e-06, "loss": 17.7166, "step": 20815 }, { "epoch": 0.38049975323084795, "grad_norm": 6.035905670956372, "learning_rate": 7.1100717858727145e-06, "loss": 17.2214, "step": 20816 }, { "epoch": 0.3805180324272945, "grad_norm": 5.7084629547077235, "learning_rate": 7.109803421327187e-06, "loss": 17.2142, "step": 20817 }, { "epoch": 0.380536311623741, "grad_norm": 8.142998894952107, "learning_rate": 7.1095350493869795e-06, "loss": 18.3317, "step": 20818 }, { "epoch": 0.38055459082018755, "grad_norm": 6.762787633424514, "learning_rate": 7.109266670053036e-06, "loss": 17.8856, "step": 20819 }, { "epoch": 0.3805728700166341, "grad_norm": 5.078895769171451, "learning_rate": 7.108998283326298e-06, "loss": 17.0001, "step": 20820 }, { "epoch": 0.38059114921308057, "grad_norm": 6.911115253979119, "learning_rate": 7.1087298892077015e-06, "loss": 17.8457, "step": 20821 }, { "epoch": 0.3806094284095271, "grad_norm": 6.12260812313701, "learning_rate": 7.108461487698192e-06, "loss": 17.577, "step": 20822 }, { "epoch": 0.38062770760597364, "grad_norm": 6.335144243529556, "learning_rate": 7.108193078798704e-06, "loss": 17.1269, "step": 20823 }, { "epoch": 0.3806459868024202, "grad_norm": 6.537416389310044, "learning_rate": 7.107924662510186e-06, "loss": 17.5166, "step": 20824 }, { "epoch": 0.3806642659988667, "grad_norm": 7.255772304525292, "learning_rate": 7.107656238833573e-06, "loss": 17.5818, "step": 20825 }, { "epoch": 0.3806825451953132, "grad_norm": 6.587597096182242, "learning_rate": 7.10738780776981e-06, "loss": 17.718, "step": 20826 }, { "epoch": 0.38070082439175973, "grad_norm": 6.450749813985822, "learning_rate": 7.107119369319834e-06, "loss": 17.4572, "step": 20827 }, { "epoch": 0.38071910358820626, "grad_norm": 9.730322542962446, "learning_rate": 7.106850923484587e-06, "loss": 18.7782, "step": 20828 }, { "epoch": 0.3807373827846528, "grad_norm": 6.702658270530379, "learning_rate": 7.106582470265011e-06, "loss": 17.5062, "step": 20829 }, { "epoch": 0.38075566198109934, "grad_norm": 6.741417519344889, "learning_rate": 7.1063140096620455e-06, "loss": 17.8569, "step": 20830 }, { "epoch": 0.3807739411775458, "grad_norm": 6.835549574482659, "learning_rate": 7.106045541676634e-06, "loss": 17.7487, "step": 20831 }, { "epoch": 0.38079222037399235, "grad_norm": 5.835097889779242, "learning_rate": 7.105777066309714e-06, "loss": 17.3014, "step": 20832 }, { "epoch": 0.3808104995704389, "grad_norm": 8.640088946506342, "learning_rate": 7.105508583562227e-06, "loss": 18.1967, "step": 20833 }, { "epoch": 0.3808287787668854, "grad_norm": 6.4034437865262674, "learning_rate": 7.105240093435118e-06, "loss": 17.1212, "step": 20834 }, { "epoch": 0.38084705796333196, "grad_norm": 5.855337970033611, "learning_rate": 7.104971595929324e-06, "loss": 17.2914, "step": 20835 }, { "epoch": 0.38086533715977844, "grad_norm": 6.656582309527312, "learning_rate": 7.1047030910457874e-06, "loss": 17.8303, "step": 20836 }, { "epoch": 0.380883616356225, "grad_norm": 6.657019482079074, "learning_rate": 7.104434578785448e-06, "loss": 17.9462, "step": 20837 }, { "epoch": 0.3809018955526715, "grad_norm": 6.17664984070476, "learning_rate": 7.104166059149249e-06, "loss": 17.4421, "step": 20838 }, { "epoch": 0.38092017474911805, "grad_norm": 5.880778338538877, "learning_rate": 7.10389753213813e-06, "loss": 17.3208, "step": 20839 }, { "epoch": 0.3809384539455645, "grad_norm": 4.631100250135552, "learning_rate": 7.103628997753033e-06, "loss": 16.809, "step": 20840 }, { "epoch": 0.38095673314201106, "grad_norm": 7.10246331981957, "learning_rate": 7.1033604559948985e-06, "loss": 17.8819, "step": 20841 }, { "epoch": 0.3809750123384576, "grad_norm": 6.2676708374018135, "learning_rate": 7.103091906864669e-06, "loss": 17.5006, "step": 20842 }, { "epoch": 0.38099329153490413, "grad_norm": 6.668340103982635, "learning_rate": 7.102823350363283e-06, "loss": 17.5777, "step": 20843 }, { "epoch": 0.38101157073135067, "grad_norm": 6.365695726242811, "learning_rate": 7.102554786491687e-06, "loss": 17.5829, "step": 20844 }, { "epoch": 0.38102984992779715, "grad_norm": 6.769414572402143, "learning_rate": 7.102286215250815e-06, "loss": 17.6714, "step": 20845 }, { "epoch": 0.3810481291242437, "grad_norm": 6.226691924059086, "learning_rate": 7.102017636641615e-06, "loss": 17.8598, "step": 20846 }, { "epoch": 0.3810664083206902, "grad_norm": 5.4387356711989865, "learning_rate": 7.101749050665024e-06, "loss": 17.2436, "step": 20847 }, { "epoch": 0.38108468751713676, "grad_norm": 6.852695570490196, "learning_rate": 7.101480457321987e-06, "loss": 17.6438, "step": 20848 }, { "epoch": 0.3811029667135833, "grad_norm": 6.629804302980361, "learning_rate": 7.101211856613442e-06, "loss": 17.5632, "step": 20849 }, { "epoch": 0.38112124591002977, "grad_norm": 6.657351809764172, "learning_rate": 7.100943248540331e-06, "loss": 17.7071, "step": 20850 }, { "epoch": 0.3811395251064763, "grad_norm": 7.011585648378357, "learning_rate": 7.100674633103597e-06, "loss": 17.6329, "step": 20851 }, { "epoch": 0.38115780430292284, "grad_norm": 7.160553696981467, "learning_rate": 7.100406010304182e-06, "loss": 17.6871, "step": 20852 }, { "epoch": 0.3811760834993694, "grad_norm": 7.141567162756561, "learning_rate": 7.100137380143025e-06, "loss": 18.0261, "step": 20853 }, { "epoch": 0.3811943626958159, "grad_norm": 6.259878955116255, "learning_rate": 7.099868742621069e-06, "loss": 17.3837, "step": 20854 }, { "epoch": 0.3812126418922624, "grad_norm": 5.492887614629066, "learning_rate": 7.099600097739254e-06, "loss": 17.3764, "step": 20855 }, { "epoch": 0.38123092108870893, "grad_norm": 7.144588748872126, "learning_rate": 7.099331445498524e-06, "loss": 17.9633, "step": 20856 }, { "epoch": 0.38124920028515547, "grad_norm": 7.159360110593641, "learning_rate": 7.0990627858998195e-06, "loss": 17.9482, "step": 20857 }, { "epoch": 0.381267479481602, "grad_norm": 7.675817932055794, "learning_rate": 7.098794118944083e-06, "loss": 18.113, "step": 20858 }, { "epoch": 0.38128575867804854, "grad_norm": 5.633699805040702, "learning_rate": 7.098525444632253e-06, "loss": 17.3153, "step": 20859 }, { "epoch": 0.381304037874495, "grad_norm": 6.470043418287717, "learning_rate": 7.0982567629652744e-06, "loss": 17.4714, "step": 20860 }, { "epoch": 0.38132231707094155, "grad_norm": 6.768151848349427, "learning_rate": 7.0979880739440876e-06, "loss": 17.6138, "step": 20861 }, { "epoch": 0.3813405962673881, "grad_norm": 6.104392527320515, "learning_rate": 7.0977193775696366e-06, "loss": 17.4135, "step": 20862 }, { "epoch": 0.3813588754638346, "grad_norm": 5.745199755207529, "learning_rate": 7.097450673842859e-06, "loss": 17.2317, "step": 20863 }, { "epoch": 0.38137715466028116, "grad_norm": 6.5674161376538756, "learning_rate": 7.097181962764699e-06, "loss": 17.8981, "step": 20864 }, { "epoch": 0.38139543385672764, "grad_norm": 7.043972020853318, "learning_rate": 7.0969132443361e-06, "loss": 17.7503, "step": 20865 }, { "epoch": 0.3814137130531742, "grad_norm": 6.367059956679154, "learning_rate": 7.096644518558e-06, "loss": 17.639, "step": 20866 }, { "epoch": 0.3814319922496207, "grad_norm": 6.702691715465437, "learning_rate": 7.096375785431344e-06, "loss": 17.6277, "step": 20867 }, { "epoch": 0.38145027144606725, "grad_norm": 6.207568160627037, "learning_rate": 7.096107044957072e-06, "loss": 17.6149, "step": 20868 }, { "epoch": 0.3814685506425138, "grad_norm": 6.595650471552085, "learning_rate": 7.095838297136127e-06, "loss": 17.7398, "step": 20869 }, { "epoch": 0.38148682983896026, "grad_norm": 6.134431622447001, "learning_rate": 7.095569541969451e-06, "loss": 17.3198, "step": 20870 }, { "epoch": 0.3815051090354068, "grad_norm": 6.07535366359923, "learning_rate": 7.095300779457986e-06, "loss": 17.5319, "step": 20871 }, { "epoch": 0.38152338823185333, "grad_norm": 5.8705506077538585, "learning_rate": 7.0950320096026735e-06, "loss": 17.4445, "step": 20872 }, { "epoch": 0.38154166742829987, "grad_norm": 6.168112308933465, "learning_rate": 7.094763232404454e-06, "loss": 17.3161, "step": 20873 }, { "epoch": 0.38155994662474635, "grad_norm": 5.920086473077339, "learning_rate": 7.094494447864272e-06, "loss": 17.2226, "step": 20874 }, { "epoch": 0.3815782258211929, "grad_norm": 6.572426053992683, "learning_rate": 7.094225655983069e-06, "loss": 17.3873, "step": 20875 }, { "epoch": 0.3815965050176394, "grad_norm": 7.497605754510795, "learning_rate": 7.093956856761788e-06, "loss": 17.387, "step": 20876 }, { "epoch": 0.38161478421408596, "grad_norm": 7.672030592204335, "learning_rate": 7.0936880502013685e-06, "loss": 17.9081, "step": 20877 }, { "epoch": 0.3816330634105325, "grad_norm": 4.961913623652882, "learning_rate": 7.093419236302753e-06, "loss": 16.7968, "step": 20878 }, { "epoch": 0.38165134260697897, "grad_norm": 5.825915175452671, "learning_rate": 7.093150415066887e-06, "loss": 17.1826, "step": 20879 }, { "epoch": 0.3816696218034255, "grad_norm": 5.778189478281201, "learning_rate": 7.09288158649471e-06, "loss": 17.192, "step": 20880 }, { "epoch": 0.38168790099987204, "grad_norm": 6.757521273809351, "learning_rate": 7.092612750587164e-06, "loss": 17.7086, "step": 20881 }, { "epoch": 0.3817061801963186, "grad_norm": 7.279928892138955, "learning_rate": 7.092343907345191e-06, "loss": 18.1617, "step": 20882 }, { "epoch": 0.3817244593927651, "grad_norm": 6.173932016935703, "learning_rate": 7.092075056769735e-06, "loss": 17.9202, "step": 20883 }, { "epoch": 0.3817427385892116, "grad_norm": 6.879067491651368, "learning_rate": 7.0918061988617386e-06, "loss": 17.9197, "step": 20884 }, { "epoch": 0.38176101778565813, "grad_norm": 6.638190201616912, "learning_rate": 7.091537333622142e-06, "loss": 17.7333, "step": 20885 }, { "epoch": 0.38177929698210467, "grad_norm": 5.592561299929222, "learning_rate": 7.0912684610518876e-06, "loss": 17.1227, "step": 20886 }, { "epoch": 0.3817975761785512, "grad_norm": 6.838690555546561, "learning_rate": 7.090999581151919e-06, "loss": 17.5876, "step": 20887 }, { "epoch": 0.38181585537499774, "grad_norm": 7.727658466566001, "learning_rate": 7.09073069392318e-06, "loss": 17.8524, "step": 20888 }, { "epoch": 0.3818341345714442, "grad_norm": 7.9605758252771865, "learning_rate": 7.09046179936661e-06, "loss": 18.4245, "step": 20889 }, { "epoch": 0.38185241376789075, "grad_norm": 4.929522106376273, "learning_rate": 7.090192897483154e-06, "loss": 16.8717, "step": 20890 }, { "epoch": 0.3818706929643373, "grad_norm": 7.145315426100771, "learning_rate": 7.089923988273752e-06, "loss": 17.7391, "step": 20891 }, { "epoch": 0.3818889721607838, "grad_norm": 5.938780832852384, "learning_rate": 7.089655071739347e-06, "loss": 17.4467, "step": 20892 }, { "epoch": 0.38190725135723036, "grad_norm": 6.16574190594442, "learning_rate": 7.089386147880885e-06, "loss": 17.5801, "step": 20893 }, { "epoch": 0.38192553055367684, "grad_norm": 6.957619290412913, "learning_rate": 7.089117216699304e-06, "loss": 17.8385, "step": 20894 }, { "epoch": 0.3819438097501234, "grad_norm": 5.634910750663889, "learning_rate": 7.088848278195548e-06, "loss": 17.4732, "step": 20895 }, { "epoch": 0.3819620889465699, "grad_norm": 12.784091532845126, "learning_rate": 7.088579332370561e-06, "loss": 18.0073, "step": 20896 }, { "epoch": 0.38198036814301645, "grad_norm": 5.366099207301944, "learning_rate": 7.088310379225285e-06, "loss": 17.1493, "step": 20897 }, { "epoch": 0.381998647339463, "grad_norm": 7.263435265105196, "learning_rate": 7.088041418760662e-06, "loss": 17.5792, "step": 20898 }, { "epoch": 0.38201692653590946, "grad_norm": 5.88373008069427, "learning_rate": 7.087772450977634e-06, "loss": 17.4534, "step": 20899 }, { "epoch": 0.382035205732356, "grad_norm": 7.455630310783019, "learning_rate": 7.087503475877145e-06, "loss": 17.9119, "step": 20900 }, { "epoch": 0.38205348492880253, "grad_norm": 5.745101603164724, "learning_rate": 7.087234493460139e-06, "loss": 17.0636, "step": 20901 }, { "epoch": 0.38207176412524907, "grad_norm": 6.147402314200255, "learning_rate": 7.086965503727556e-06, "loss": 17.4028, "step": 20902 }, { "epoch": 0.3820900433216956, "grad_norm": 7.93139180122912, "learning_rate": 7.086696506680342e-06, "loss": 18.0001, "step": 20903 }, { "epoch": 0.3821083225181421, "grad_norm": 5.751434835443962, "learning_rate": 7.086427502319435e-06, "loss": 17.4825, "step": 20904 }, { "epoch": 0.3821266017145886, "grad_norm": 10.527463834833005, "learning_rate": 7.0861584906457805e-06, "loss": 18.3152, "step": 20905 }, { "epoch": 0.38214488091103516, "grad_norm": 6.088116082350334, "learning_rate": 7.085889471660323e-06, "loss": 17.3969, "step": 20906 }, { "epoch": 0.3821631601074817, "grad_norm": 5.788142043169116, "learning_rate": 7.085620445364005e-06, "loss": 17.5298, "step": 20907 }, { "epoch": 0.38218143930392817, "grad_norm": 7.462126406792685, "learning_rate": 7.085351411757766e-06, "loss": 18.252, "step": 20908 }, { "epoch": 0.3821997185003747, "grad_norm": 6.409991001895903, "learning_rate": 7.085082370842553e-06, "loss": 17.4893, "step": 20909 }, { "epoch": 0.38221799769682124, "grad_norm": 6.865954809362901, "learning_rate": 7.084813322619306e-06, "loss": 17.9921, "step": 20910 }, { "epoch": 0.3822362768932678, "grad_norm": 6.856510409482297, "learning_rate": 7.08454426708897e-06, "loss": 17.4194, "step": 20911 }, { "epoch": 0.3822545560897143, "grad_norm": 4.615319540814306, "learning_rate": 7.0842752042524865e-06, "loss": 16.9605, "step": 20912 }, { "epoch": 0.3822728352861608, "grad_norm": 5.526644847407925, "learning_rate": 7.084006134110799e-06, "loss": 17.1899, "step": 20913 }, { "epoch": 0.38229111448260733, "grad_norm": 8.99895679572845, "learning_rate": 7.0837370566648525e-06, "loss": 17.4682, "step": 20914 }, { "epoch": 0.38230939367905387, "grad_norm": 6.301254732162111, "learning_rate": 7.0834679719155876e-06, "loss": 17.1955, "step": 20915 }, { "epoch": 0.3823276728755004, "grad_norm": 7.629743795603142, "learning_rate": 7.083198879863947e-06, "loss": 17.5275, "step": 20916 }, { "epoch": 0.38234595207194694, "grad_norm": 6.202724535045338, "learning_rate": 7.082929780510877e-06, "loss": 17.3454, "step": 20917 }, { "epoch": 0.3823642312683934, "grad_norm": 6.982099796758858, "learning_rate": 7.0826606738573175e-06, "loss": 17.5226, "step": 20918 }, { "epoch": 0.38238251046483995, "grad_norm": 6.245180649736514, "learning_rate": 7.082391559904213e-06, "loss": 17.4011, "step": 20919 }, { "epoch": 0.3824007896612865, "grad_norm": 6.604345590638832, "learning_rate": 7.082122438652508e-06, "loss": 17.5027, "step": 20920 }, { "epoch": 0.382419068857733, "grad_norm": 5.691258786852814, "learning_rate": 7.081853310103145e-06, "loss": 17.1142, "step": 20921 }, { "epoch": 0.38243734805417956, "grad_norm": 6.314341060201153, "learning_rate": 7.081584174257066e-06, "loss": 17.4148, "step": 20922 }, { "epoch": 0.38245562725062604, "grad_norm": 6.277514237526559, "learning_rate": 7.081315031115214e-06, "loss": 17.5835, "step": 20923 }, { "epoch": 0.3824739064470726, "grad_norm": 5.968445440806513, "learning_rate": 7.081045880678534e-06, "loss": 17.3819, "step": 20924 }, { "epoch": 0.3824921856435191, "grad_norm": 5.913309017293552, "learning_rate": 7.08077672294797e-06, "loss": 17.2318, "step": 20925 }, { "epoch": 0.38251046483996565, "grad_norm": 7.212968971941925, "learning_rate": 7.080507557924463e-06, "loss": 17.874, "step": 20926 }, { "epoch": 0.3825287440364122, "grad_norm": 5.659362249790801, "learning_rate": 7.080238385608958e-06, "loss": 17.3396, "step": 20927 }, { "epoch": 0.38254702323285866, "grad_norm": 7.303117907501463, "learning_rate": 7.079969206002397e-06, "loss": 17.5264, "step": 20928 }, { "epoch": 0.3825653024293052, "grad_norm": 7.148331468273093, "learning_rate": 7.079700019105725e-06, "loss": 18.0166, "step": 20929 }, { "epoch": 0.38258358162575173, "grad_norm": 6.2101605807832945, "learning_rate": 7.079430824919885e-06, "loss": 17.5685, "step": 20930 }, { "epoch": 0.38260186082219827, "grad_norm": 6.126608292160563, "learning_rate": 7.0791616234458215e-06, "loss": 17.3098, "step": 20931 }, { "epoch": 0.3826201400186448, "grad_norm": 6.2424096754175515, "learning_rate": 7.078892414684475e-06, "loss": 17.2135, "step": 20932 }, { "epoch": 0.3826384192150913, "grad_norm": 7.893451554677613, "learning_rate": 7.078623198636792e-06, "loss": 17.8848, "step": 20933 }, { "epoch": 0.3826566984115378, "grad_norm": 6.27278983088745, "learning_rate": 7.078353975303716e-06, "loss": 17.4218, "step": 20934 }, { "epoch": 0.38267497760798436, "grad_norm": 7.5959714124403614, "learning_rate": 7.078084744686189e-06, "loss": 18.019, "step": 20935 }, { "epoch": 0.3826932568044309, "grad_norm": 6.751028770933, "learning_rate": 7.077815506785154e-06, "loss": 17.5426, "step": 20936 }, { "epoch": 0.38271153600087743, "grad_norm": 5.352088787704339, "learning_rate": 7.077546261601556e-06, "loss": 17.0635, "step": 20937 }, { "epoch": 0.3827298151973239, "grad_norm": 6.524017412712978, "learning_rate": 7.077277009136341e-06, "loss": 17.6072, "step": 20938 }, { "epoch": 0.38274809439377044, "grad_norm": 5.923411217835149, "learning_rate": 7.077007749390448e-06, "loss": 17.1601, "step": 20939 }, { "epoch": 0.382766373590217, "grad_norm": 6.382600647904093, "learning_rate": 7.076738482364825e-06, "loss": 17.797, "step": 20940 }, { "epoch": 0.3827846527866635, "grad_norm": 7.349333360894832, "learning_rate": 7.076469208060412e-06, "loss": 18.4183, "step": 20941 }, { "epoch": 0.38280293198311, "grad_norm": 7.886549704741756, "learning_rate": 7.076199926478155e-06, "loss": 17.9815, "step": 20942 }, { "epoch": 0.38282121117955653, "grad_norm": 6.920208388927554, "learning_rate": 7.075930637618998e-06, "loss": 17.764, "step": 20943 }, { "epoch": 0.38283949037600307, "grad_norm": 8.381040262324833, "learning_rate": 7.075661341483884e-06, "loss": 18.1572, "step": 20944 }, { "epoch": 0.3828577695724496, "grad_norm": 6.618339656691645, "learning_rate": 7.0753920380737564e-06, "loss": 17.5145, "step": 20945 }, { "epoch": 0.38287604876889614, "grad_norm": 6.021925121301376, "learning_rate": 7.075122727389561e-06, "loss": 17.5195, "step": 20946 }, { "epoch": 0.3828943279653426, "grad_norm": 7.302842556870371, "learning_rate": 7.07485340943224e-06, "loss": 17.9682, "step": 20947 }, { "epoch": 0.38291260716178915, "grad_norm": 6.476085190963245, "learning_rate": 7.074584084202739e-06, "loss": 17.7174, "step": 20948 }, { "epoch": 0.3829308863582357, "grad_norm": 7.060712497037543, "learning_rate": 7.074314751702e-06, "loss": 17.7902, "step": 20949 }, { "epoch": 0.3829491655546822, "grad_norm": 7.439936953729566, "learning_rate": 7.0740454119309655e-06, "loss": 17.8011, "step": 20950 }, { "epoch": 0.38296744475112876, "grad_norm": 5.748107972841419, "learning_rate": 7.073776064890584e-06, "loss": 17.2943, "step": 20951 }, { "epoch": 0.38298572394757524, "grad_norm": 7.592641996812257, "learning_rate": 7.073506710581798e-06, "loss": 18.1216, "step": 20952 }, { "epoch": 0.3830040031440218, "grad_norm": 5.639567552391562, "learning_rate": 7.073237349005551e-06, "loss": 17.2927, "step": 20953 }, { "epoch": 0.3830222823404683, "grad_norm": 6.2018678368437605, "learning_rate": 7.072967980162785e-06, "loss": 17.395, "step": 20954 }, { "epoch": 0.38304056153691485, "grad_norm": 5.873436213169519, "learning_rate": 7.072698604054448e-06, "loss": 17.2147, "step": 20955 }, { "epoch": 0.3830588407333614, "grad_norm": 6.504260340309082, "learning_rate": 7.07242922068148e-06, "loss": 17.9237, "step": 20956 }, { "epoch": 0.38307711992980786, "grad_norm": 6.911863760797188, "learning_rate": 7.072159830044829e-06, "loss": 17.8015, "step": 20957 }, { "epoch": 0.3830953991262544, "grad_norm": 6.0197496330229265, "learning_rate": 7.071890432145438e-06, "loss": 17.5761, "step": 20958 }, { "epoch": 0.38311367832270093, "grad_norm": 5.974001537225322, "learning_rate": 7.07162102698425e-06, "loss": 17.3282, "step": 20959 }, { "epoch": 0.38313195751914747, "grad_norm": 7.8089125046639625, "learning_rate": 7.071351614562211e-06, "loss": 18.4462, "step": 20960 }, { "epoch": 0.383150236715594, "grad_norm": 6.679126497544561, "learning_rate": 7.071082194880263e-06, "loss": 17.5977, "step": 20961 }, { "epoch": 0.3831685159120405, "grad_norm": 7.571452136221873, "learning_rate": 7.070812767939353e-06, "loss": 17.9026, "step": 20962 }, { "epoch": 0.383186795108487, "grad_norm": 5.488059508087825, "learning_rate": 7.0705433337404235e-06, "loss": 17.1057, "step": 20963 }, { "epoch": 0.38320507430493356, "grad_norm": 5.754784046064573, "learning_rate": 7.070273892284418e-06, "loss": 17.1884, "step": 20964 }, { "epoch": 0.3832233535013801, "grad_norm": 6.562704278530298, "learning_rate": 7.0700044435722845e-06, "loss": 17.3791, "step": 20965 }, { "epoch": 0.38324163269782663, "grad_norm": 6.10723289146391, "learning_rate": 7.069734987604964e-06, "loss": 17.1413, "step": 20966 }, { "epoch": 0.3832599118942731, "grad_norm": 9.181396417149502, "learning_rate": 7.069465524383401e-06, "loss": 17.8588, "step": 20967 }, { "epoch": 0.38327819109071964, "grad_norm": 5.816902113994662, "learning_rate": 7.069196053908541e-06, "loss": 17.29, "step": 20968 }, { "epoch": 0.3832964702871662, "grad_norm": 7.247253740811549, "learning_rate": 7.0689265761813295e-06, "loss": 17.9448, "step": 20969 }, { "epoch": 0.3833147494836127, "grad_norm": 5.846369869770649, "learning_rate": 7.0686570912027095e-06, "loss": 17.5586, "step": 20970 }, { "epoch": 0.38333302868005925, "grad_norm": 6.9528831305859, "learning_rate": 7.068387598973626e-06, "loss": 17.7971, "step": 20971 }, { "epoch": 0.38335130787650573, "grad_norm": 5.561074919618863, "learning_rate": 7.068118099495023e-06, "loss": 16.9145, "step": 20972 }, { "epoch": 0.38336958707295227, "grad_norm": 6.119739518076784, "learning_rate": 7.067848592767845e-06, "loss": 17.3776, "step": 20973 }, { "epoch": 0.3833878662693988, "grad_norm": 7.926793988714482, "learning_rate": 7.0675790787930384e-06, "loss": 18.1583, "step": 20974 }, { "epoch": 0.38340614546584534, "grad_norm": 7.415874693286228, "learning_rate": 7.067309557571546e-06, "loss": 17.8907, "step": 20975 }, { "epoch": 0.3834244246622918, "grad_norm": 5.336722379137145, "learning_rate": 7.067040029104314e-06, "loss": 16.8849, "step": 20976 }, { "epoch": 0.38344270385873835, "grad_norm": 6.731954946186777, "learning_rate": 7.066770493392284e-06, "loss": 17.6902, "step": 20977 }, { "epoch": 0.3834609830551849, "grad_norm": 6.256213226265865, "learning_rate": 7.066500950436404e-06, "loss": 17.5907, "step": 20978 }, { "epoch": 0.3834792622516314, "grad_norm": 6.945786522373672, "learning_rate": 7.066231400237619e-06, "loss": 17.4622, "step": 20979 }, { "epoch": 0.38349754144807796, "grad_norm": 5.882857721699971, "learning_rate": 7.06596184279687e-06, "loss": 17.3546, "step": 20980 }, { "epoch": 0.38351582064452444, "grad_norm": 5.737339459003288, "learning_rate": 7.065692278115105e-06, "loss": 17.4939, "step": 20981 }, { "epoch": 0.383534099840971, "grad_norm": 6.914059546544488, "learning_rate": 7.065422706193266e-06, "loss": 17.869, "step": 20982 }, { "epoch": 0.3835523790374175, "grad_norm": 7.15039442679996, "learning_rate": 7.065153127032303e-06, "loss": 17.7811, "step": 20983 }, { "epoch": 0.38357065823386405, "grad_norm": 7.5436196844894905, "learning_rate": 7.064883540633155e-06, "loss": 17.8848, "step": 20984 }, { "epoch": 0.3835889374303106, "grad_norm": 8.22823033911563, "learning_rate": 7.06461394699677e-06, "loss": 18.5603, "step": 20985 }, { "epoch": 0.38360721662675706, "grad_norm": 6.363131251137945, "learning_rate": 7.064344346124092e-06, "loss": 17.3607, "step": 20986 }, { "epoch": 0.3836254958232036, "grad_norm": 5.632763651606355, "learning_rate": 7.064074738016067e-06, "loss": 17.3302, "step": 20987 }, { "epoch": 0.38364377501965014, "grad_norm": 6.683524758785473, "learning_rate": 7.063805122673638e-06, "loss": 17.463, "step": 20988 }, { "epoch": 0.38366205421609667, "grad_norm": 5.601327133997698, "learning_rate": 7.0635355000977525e-06, "loss": 17.3258, "step": 20989 }, { "epoch": 0.3836803334125432, "grad_norm": 5.834606755959688, "learning_rate": 7.063265870289353e-06, "loss": 17.2437, "step": 20990 }, { "epoch": 0.3836986126089897, "grad_norm": 7.335505367654194, "learning_rate": 7.062996233249385e-06, "loss": 17.736, "step": 20991 }, { "epoch": 0.3837168918054362, "grad_norm": 7.605727190558551, "learning_rate": 7.062726588978795e-06, "loss": 17.6628, "step": 20992 }, { "epoch": 0.38373517100188276, "grad_norm": 6.677220834372723, "learning_rate": 7.062456937478529e-06, "loss": 17.5839, "step": 20993 }, { "epoch": 0.3837534501983293, "grad_norm": 5.9987708403078175, "learning_rate": 7.062187278749528e-06, "loss": 17.1523, "step": 20994 }, { "epoch": 0.38377172939477583, "grad_norm": 5.824464208607177, "learning_rate": 7.0619176127927416e-06, "loss": 17.3179, "step": 20995 }, { "epoch": 0.3837900085912223, "grad_norm": 6.46492619817191, "learning_rate": 7.0616479396091105e-06, "loss": 17.6594, "step": 20996 }, { "epoch": 0.38380828778766884, "grad_norm": 7.486001052481146, "learning_rate": 7.061378259199585e-06, "loss": 18.256, "step": 20997 }, { "epoch": 0.3838265669841154, "grad_norm": 6.76620453305171, "learning_rate": 7.061108571565105e-06, "loss": 17.6748, "step": 20998 }, { "epoch": 0.3838448461805619, "grad_norm": 6.107454163429668, "learning_rate": 7.06083887670662e-06, "loss": 17.2595, "step": 20999 }, { "epoch": 0.38386312537700845, "grad_norm": 6.861673884222232, "learning_rate": 7.060569174625074e-06, "loss": 17.6715, "step": 21000 }, { "epoch": 0.38388140457345493, "grad_norm": 7.388862690006889, "learning_rate": 7.060299465321409e-06, "loss": 17.999, "step": 21001 }, { "epoch": 0.38389968376990147, "grad_norm": 6.952039206444979, "learning_rate": 7.060029748796575e-06, "loss": 17.8205, "step": 21002 }, { "epoch": 0.383917962966348, "grad_norm": 8.046804842311497, "learning_rate": 7.059760025051517e-06, "loss": 17.9271, "step": 21003 }, { "epoch": 0.38393624216279454, "grad_norm": 6.753743435534955, "learning_rate": 7.059490294087178e-06, "loss": 17.7938, "step": 21004 }, { "epoch": 0.3839545213592411, "grad_norm": 7.518158013897047, "learning_rate": 7.059220555904503e-06, "loss": 17.8388, "step": 21005 }, { "epoch": 0.38397280055568755, "grad_norm": 6.211908699222352, "learning_rate": 7.058950810504439e-06, "loss": 17.3581, "step": 21006 }, { "epoch": 0.3839910797521341, "grad_norm": 7.248228724588168, "learning_rate": 7.058681057887932e-06, "loss": 17.9582, "step": 21007 }, { "epoch": 0.3840093589485806, "grad_norm": 6.761188584964818, "learning_rate": 7.058411298055925e-06, "loss": 17.8539, "step": 21008 }, { "epoch": 0.38402763814502716, "grad_norm": 6.668129533612861, "learning_rate": 7.058141531009366e-06, "loss": 17.4327, "step": 21009 }, { "epoch": 0.38404591734147364, "grad_norm": 6.720147588197332, "learning_rate": 7.057871756749199e-06, "loss": 17.5705, "step": 21010 }, { "epoch": 0.3840641965379202, "grad_norm": 6.025626867685157, "learning_rate": 7.057601975276372e-06, "loss": 17.2125, "step": 21011 }, { "epoch": 0.3840824757343667, "grad_norm": 6.418059717440586, "learning_rate": 7.057332186591827e-06, "loss": 17.2908, "step": 21012 }, { "epoch": 0.38410075493081325, "grad_norm": 5.208297016391685, "learning_rate": 7.057062390696511e-06, "loss": 16.9275, "step": 21013 }, { "epoch": 0.3841190341272598, "grad_norm": 6.7694717826112605, "learning_rate": 7.05679258759137e-06, "loss": 17.4841, "step": 21014 }, { "epoch": 0.38413731332370626, "grad_norm": 5.318957700949014, "learning_rate": 7.056522777277349e-06, "loss": 16.9798, "step": 21015 }, { "epoch": 0.3841555925201528, "grad_norm": 6.8537494940230745, "learning_rate": 7.056252959755396e-06, "loss": 17.4746, "step": 21016 }, { "epoch": 0.38417387171659934, "grad_norm": 6.9875700824701035, "learning_rate": 7.055983135026454e-06, "loss": 17.7363, "step": 21017 }, { "epoch": 0.38419215091304587, "grad_norm": 6.606386917732638, "learning_rate": 7.055713303091467e-06, "loss": 17.5111, "step": 21018 }, { "epoch": 0.3842104301094924, "grad_norm": 5.8560429944783605, "learning_rate": 7.055443463951386e-06, "loss": 17.2994, "step": 21019 }, { "epoch": 0.3842287093059389, "grad_norm": 5.899752883657581, "learning_rate": 7.0551736176071525e-06, "loss": 17.3462, "step": 21020 }, { "epoch": 0.3842469885023854, "grad_norm": 5.719587758213765, "learning_rate": 7.054903764059716e-06, "loss": 17.4546, "step": 21021 }, { "epoch": 0.38426526769883196, "grad_norm": 6.360485085785115, "learning_rate": 7.054633903310018e-06, "loss": 17.416, "step": 21022 }, { "epoch": 0.3842835468952785, "grad_norm": 6.2388700986330194, "learning_rate": 7.054364035359007e-06, "loss": 17.3875, "step": 21023 }, { "epoch": 0.38430182609172503, "grad_norm": 7.730507789397206, "learning_rate": 7.05409416020763e-06, "loss": 18.2943, "step": 21024 }, { "epoch": 0.3843201052881715, "grad_norm": 7.365054977760105, "learning_rate": 7.053824277856829e-06, "loss": 18.2068, "step": 21025 }, { "epoch": 0.38433838448461805, "grad_norm": 7.222826076162248, "learning_rate": 7.053554388307553e-06, "loss": 17.8555, "step": 21026 }, { "epoch": 0.3843566636810646, "grad_norm": 7.403992404066004, "learning_rate": 7.0532844915607464e-06, "loss": 17.6077, "step": 21027 }, { "epoch": 0.3843749428775111, "grad_norm": 6.646387237157337, "learning_rate": 7.053014587617357e-06, "loss": 17.6762, "step": 21028 }, { "epoch": 0.38439322207395765, "grad_norm": 5.383995280193755, "learning_rate": 7.052744676478329e-06, "loss": 17.0154, "step": 21029 }, { "epoch": 0.38441150127040413, "grad_norm": 5.231586236215032, "learning_rate": 7.05247475814461e-06, "loss": 17.0177, "step": 21030 }, { "epoch": 0.38442978046685067, "grad_norm": 6.022003660024721, "learning_rate": 7.0522048326171446e-06, "loss": 17.5111, "step": 21031 }, { "epoch": 0.3844480596632972, "grad_norm": 6.961967428506571, "learning_rate": 7.051934899896877e-06, "loss": 17.5252, "step": 21032 }, { "epoch": 0.38446633885974374, "grad_norm": 6.121851983504221, "learning_rate": 7.051664959984757e-06, "loss": 17.6957, "step": 21033 }, { "epoch": 0.3844846180561903, "grad_norm": 7.502628993346572, "learning_rate": 7.051395012881732e-06, "loss": 18.1055, "step": 21034 }, { "epoch": 0.38450289725263675, "grad_norm": 6.937053189562557, "learning_rate": 7.051125058588744e-06, "loss": 17.5169, "step": 21035 }, { "epoch": 0.3845211764490833, "grad_norm": 6.425997547010063, "learning_rate": 7.0508550971067395e-06, "loss": 17.5367, "step": 21036 }, { "epoch": 0.3845394556455298, "grad_norm": 7.543885421665368, "learning_rate": 7.050585128436664e-06, "loss": 17.9488, "step": 21037 }, { "epoch": 0.38455773484197636, "grad_norm": 5.393873116929426, "learning_rate": 7.05031515257947e-06, "loss": 17.1293, "step": 21038 }, { "epoch": 0.3845760140384229, "grad_norm": 7.6006476187830305, "learning_rate": 7.0500451695360974e-06, "loss": 18.0129, "step": 21039 }, { "epoch": 0.3845942932348694, "grad_norm": 6.896454748473229, "learning_rate": 7.049775179307494e-06, "loss": 17.8704, "step": 21040 }, { "epoch": 0.3846125724313159, "grad_norm": 7.847156974161075, "learning_rate": 7.049505181894607e-06, "loss": 17.9775, "step": 21041 }, { "epoch": 0.38463085162776245, "grad_norm": 7.167280215081267, "learning_rate": 7.049235177298381e-06, "loss": 17.628, "step": 21042 }, { "epoch": 0.384649130824209, "grad_norm": 4.947757102027752, "learning_rate": 7.048965165519764e-06, "loss": 16.8362, "step": 21043 }, { "epoch": 0.38466741002065546, "grad_norm": 6.1773599780632, "learning_rate": 7.048695146559703e-06, "loss": 17.6304, "step": 21044 }, { "epoch": 0.384685689217102, "grad_norm": 7.392166611133998, "learning_rate": 7.048425120419142e-06, "loss": 17.799, "step": 21045 }, { "epoch": 0.38470396841354854, "grad_norm": 7.947934466412802, "learning_rate": 7.04815508709903e-06, "loss": 17.9185, "step": 21046 }, { "epoch": 0.38472224760999507, "grad_norm": 6.3371784457134375, "learning_rate": 7.047885046600311e-06, "loss": 17.5585, "step": 21047 }, { "epoch": 0.3847405268064416, "grad_norm": 6.35844089344265, "learning_rate": 7.047614998923934e-06, "loss": 17.6056, "step": 21048 }, { "epoch": 0.3847588060028881, "grad_norm": 6.371209181819641, "learning_rate": 7.047344944070843e-06, "loss": 17.772, "step": 21049 }, { "epoch": 0.3847770851993346, "grad_norm": 9.318432427827792, "learning_rate": 7.047074882041986e-06, "loss": 18.2345, "step": 21050 }, { "epoch": 0.38479536439578116, "grad_norm": 6.422751603195068, "learning_rate": 7.046804812838308e-06, "loss": 17.4365, "step": 21051 }, { "epoch": 0.3848136435922277, "grad_norm": 5.859817860410559, "learning_rate": 7.046534736460758e-06, "loss": 17.2159, "step": 21052 }, { "epoch": 0.38483192278867423, "grad_norm": 7.197963911762869, "learning_rate": 7.046264652910282e-06, "loss": 17.7393, "step": 21053 }, { "epoch": 0.3848502019851207, "grad_norm": 7.672084740455577, "learning_rate": 7.045994562187825e-06, "loss": 17.8146, "step": 21054 }, { "epoch": 0.38486848118156725, "grad_norm": 6.565158908516528, "learning_rate": 7.045724464294335e-06, "loss": 17.7477, "step": 21055 }, { "epoch": 0.3848867603780138, "grad_norm": 7.8717063409463055, "learning_rate": 7.045454359230757e-06, "loss": 18.0792, "step": 21056 }, { "epoch": 0.3849050395744603, "grad_norm": 5.79232056568044, "learning_rate": 7.04518424699804e-06, "loss": 17.4529, "step": 21057 }, { "epoch": 0.38492331877090685, "grad_norm": 6.33561194468226, "learning_rate": 7.044914127597131e-06, "loss": 17.6643, "step": 21058 }, { "epoch": 0.38494159796735333, "grad_norm": 6.785048526549523, "learning_rate": 7.044644001028973e-06, "loss": 17.7654, "step": 21059 }, { "epoch": 0.38495987716379987, "grad_norm": 8.698879900998087, "learning_rate": 7.044373867294516e-06, "loss": 18.3851, "step": 21060 }, { "epoch": 0.3849781563602464, "grad_norm": 6.081322171523658, "learning_rate": 7.044103726394706e-06, "loss": 17.4652, "step": 21061 }, { "epoch": 0.38499643555669294, "grad_norm": 7.096132129346076, "learning_rate": 7.04383357833049e-06, "loss": 17.8868, "step": 21062 }, { "epoch": 0.3850147147531395, "grad_norm": 6.499055407332533, "learning_rate": 7.043563423102815e-06, "loss": 17.4621, "step": 21063 }, { "epoch": 0.38503299394958596, "grad_norm": 5.897318297174493, "learning_rate": 7.043293260712627e-06, "loss": 17.4509, "step": 21064 }, { "epoch": 0.3850512731460325, "grad_norm": 7.0679735364353835, "learning_rate": 7.043023091160875e-06, "loss": 17.8111, "step": 21065 }, { "epoch": 0.385069552342479, "grad_norm": 5.8669612247424086, "learning_rate": 7.042752914448502e-06, "loss": 17.293, "step": 21066 }, { "epoch": 0.38508783153892556, "grad_norm": 7.397229059389791, "learning_rate": 7.042482730576459e-06, "loss": 17.7818, "step": 21067 }, { "epoch": 0.3851061107353721, "grad_norm": 6.583045133857047, "learning_rate": 7.04221253954569e-06, "loss": 17.7877, "step": 21068 }, { "epoch": 0.3851243899318186, "grad_norm": 6.318748984029918, "learning_rate": 7.041942341357144e-06, "loss": 17.4063, "step": 21069 }, { "epoch": 0.3851426691282651, "grad_norm": 6.166478127521871, "learning_rate": 7.0416721360117666e-06, "loss": 17.7328, "step": 21070 }, { "epoch": 0.38516094832471165, "grad_norm": 5.73594155656501, "learning_rate": 7.041401923510505e-06, "loss": 17.2638, "step": 21071 }, { "epoch": 0.3851792275211582, "grad_norm": 7.858864102035168, "learning_rate": 7.0411317038543095e-06, "loss": 17.9374, "step": 21072 }, { "epoch": 0.3851975067176047, "grad_norm": 5.148517394924511, "learning_rate": 7.040861477044122e-06, "loss": 16.8221, "step": 21073 }, { "epoch": 0.3852157859140512, "grad_norm": 7.215845543892201, "learning_rate": 7.040591243080893e-06, "loss": 18.0409, "step": 21074 }, { "epoch": 0.38523406511049774, "grad_norm": 7.545392211958562, "learning_rate": 7.040321001965569e-06, "loss": 17.8653, "step": 21075 }, { "epoch": 0.38525234430694427, "grad_norm": 6.430969382890963, "learning_rate": 7.040050753699097e-06, "loss": 17.3215, "step": 21076 }, { "epoch": 0.3852706235033908, "grad_norm": 6.494384680228185, "learning_rate": 7.039780498282422e-06, "loss": 17.5879, "step": 21077 }, { "epoch": 0.3852889026998373, "grad_norm": 6.95596397269451, "learning_rate": 7.039510235716496e-06, "loss": 17.7674, "step": 21078 }, { "epoch": 0.3853071818962838, "grad_norm": 5.629432299225228, "learning_rate": 7.039239966002264e-06, "loss": 17.2038, "step": 21079 }, { "epoch": 0.38532546109273036, "grad_norm": 6.775178413315715, "learning_rate": 7.038969689140671e-06, "loss": 17.683, "step": 21080 }, { "epoch": 0.3853437402891769, "grad_norm": 6.248519359035783, "learning_rate": 7.038699405132668e-06, "loss": 17.2174, "step": 21081 }, { "epoch": 0.38536201948562343, "grad_norm": 6.120305207682711, "learning_rate": 7.0384291139791975e-06, "loss": 17.4106, "step": 21082 }, { "epoch": 0.3853802986820699, "grad_norm": 5.493229917477497, "learning_rate": 7.038158815681213e-06, "loss": 17.1191, "step": 21083 }, { "epoch": 0.38539857787851645, "grad_norm": 7.91059832809418, "learning_rate": 7.037888510239657e-06, "loss": 18.056, "step": 21084 }, { "epoch": 0.385416857074963, "grad_norm": 5.522442157148008, "learning_rate": 7.037618197655479e-06, "loss": 17.2022, "step": 21085 }, { "epoch": 0.3854351362714095, "grad_norm": 6.181231408716889, "learning_rate": 7.037347877929626e-06, "loss": 17.4109, "step": 21086 }, { "epoch": 0.38545341546785605, "grad_norm": 6.892857205373257, "learning_rate": 7.037077551063045e-06, "loss": 17.3751, "step": 21087 }, { "epoch": 0.38547169466430253, "grad_norm": 5.573036233274179, "learning_rate": 7.036807217056685e-06, "loss": 17.1242, "step": 21088 }, { "epoch": 0.38548997386074907, "grad_norm": 5.633173271976434, "learning_rate": 7.036536875911492e-06, "loss": 16.9751, "step": 21089 }, { "epoch": 0.3855082530571956, "grad_norm": 7.079818367249521, "learning_rate": 7.036266527628415e-06, "loss": 18.0067, "step": 21090 }, { "epoch": 0.38552653225364214, "grad_norm": 6.997568015876288, "learning_rate": 7.035996172208398e-06, "loss": 17.6373, "step": 21091 }, { "epoch": 0.3855448114500887, "grad_norm": 7.057284878058664, "learning_rate": 7.035725809652392e-06, "loss": 17.9884, "step": 21092 }, { "epoch": 0.38556309064653516, "grad_norm": 6.205975896431654, "learning_rate": 7.035455439961345e-06, "loss": 17.2431, "step": 21093 }, { "epoch": 0.3855813698429817, "grad_norm": 10.777434619190368, "learning_rate": 7.035185063136203e-06, "loss": 17.7487, "step": 21094 }, { "epoch": 0.3855996490394282, "grad_norm": 7.723381474482577, "learning_rate": 7.034914679177913e-06, "loss": 17.8022, "step": 21095 }, { "epoch": 0.38561792823587476, "grad_norm": 6.358376912339185, "learning_rate": 7.034644288087424e-06, "loss": 17.643, "step": 21096 }, { "epoch": 0.3856362074323213, "grad_norm": 7.280527488684475, "learning_rate": 7.034373889865683e-06, "loss": 17.6818, "step": 21097 }, { "epoch": 0.3856544866287678, "grad_norm": 7.806657770907371, "learning_rate": 7.034103484513639e-06, "loss": 18.6375, "step": 21098 }, { "epoch": 0.3856727658252143, "grad_norm": 6.957910146235624, "learning_rate": 7.033833072032238e-06, "loss": 17.8476, "step": 21099 }, { "epoch": 0.38569104502166085, "grad_norm": 8.754076732392846, "learning_rate": 7.033562652422428e-06, "loss": 18.5968, "step": 21100 }, { "epoch": 0.3857093242181074, "grad_norm": 7.531241236820884, "learning_rate": 7.033292225685159e-06, "loss": 17.4104, "step": 21101 }, { "epoch": 0.3857276034145539, "grad_norm": 5.7801749191855345, "learning_rate": 7.0330217918213765e-06, "loss": 17.1987, "step": 21102 }, { "epoch": 0.3857458826110004, "grad_norm": 5.485070074298556, "learning_rate": 7.03275135083203e-06, "loss": 17.1562, "step": 21103 }, { "epoch": 0.38576416180744694, "grad_norm": 6.9288162598212315, "learning_rate": 7.032480902718064e-06, "loss": 17.8123, "step": 21104 }, { "epoch": 0.3857824410038935, "grad_norm": 6.468959941191855, "learning_rate": 7.03221044748043e-06, "loss": 17.5832, "step": 21105 }, { "epoch": 0.38580072020034, "grad_norm": 7.959014596218402, "learning_rate": 7.0319399851200754e-06, "loss": 17.4631, "step": 21106 }, { "epoch": 0.38581899939678654, "grad_norm": 6.044343220531988, "learning_rate": 7.0316695156379475e-06, "loss": 17.3057, "step": 21107 }, { "epoch": 0.385837278593233, "grad_norm": 8.2256849637106, "learning_rate": 7.031399039034994e-06, "loss": 18.4641, "step": 21108 }, { "epoch": 0.38585555778967956, "grad_norm": 5.699225958830676, "learning_rate": 7.031128555312161e-06, "loss": 17.1867, "step": 21109 }, { "epoch": 0.3858738369861261, "grad_norm": 6.672498573674858, "learning_rate": 7.030858064470402e-06, "loss": 17.3648, "step": 21110 }, { "epoch": 0.38589211618257263, "grad_norm": 4.735959575691783, "learning_rate": 7.03058756651066e-06, "loss": 16.7964, "step": 21111 }, { "epoch": 0.3859103953790191, "grad_norm": 7.371837255640524, "learning_rate": 7.030317061433884e-06, "loss": 17.9339, "step": 21112 }, { "epoch": 0.38592867457546565, "grad_norm": 6.87418183915726, "learning_rate": 7.0300465492410256e-06, "loss": 17.7446, "step": 21113 }, { "epoch": 0.3859469537719122, "grad_norm": 5.61363960267249, "learning_rate": 7.029776029933027e-06, "loss": 17.2526, "step": 21114 }, { "epoch": 0.3859652329683587, "grad_norm": 8.675659705115105, "learning_rate": 7.029505503510842e-06, "loss": 18.4315, "step": 21115 }, { "epoch": 0.38598351216480525, "grad_norm": 6.314659400689751, "learning_rate": 7.029234969975415e-06, "loss": 17.6072, "step": 21116 }, { "epoch": 0.38600179136125173, "grad_norm": 7.281348543329399, "learning_rate": 7.028964429327697e-06, "loss": 17.6171, "step": 21117 }, { "epoch": 0.38602007055769827, "grad_norm": 6.886107038104054, "learning_rate": 7.028693881568632e-06, "loss": 17.8502, "step": 21118 }, { "epoch": 0.3860383497541448, "grad_norm": 6.362316523667407, "learning_rate": 7.028423326699173e-06, "loss": 17.5793, "step": 21119 }, { "epoch": 0.38605662895059134, "grad_norm": 7.459611734937078, "learning_rate": 7.028152764720265e-06, "loss": 17.6337, "step": 21120 }, { "epoch": 0.3860749081470379, "grad_norm": 5.5907760913335265, "learning_rate": 7.027882195632861e-06, "loss": 17.2374, "step": 21121 }, { "epoch": 0.38609318734348436, "grad_norm": 5.973703927348647, "learning_rate": 7.027611619437902e-06, "loss": 17.1285, "step": 21122 }, { "epoch": 0.3861114665399309, "grad_norm": 4.924313880101816, "learning_rate": 7.02734103613634e-06, "loss": 16.8294, "step": 21123 }, { "epoch": 0.3861297457363774, "grad_norm": 7.181231802780088, "learning_rate": 7.027070445729127e-06, "loss": 17.681, "step": 21124 }, { "epoch": 0.38614802493282396, "grad_norm": 6.990090253560784, "learning_rate": 7.026799848217206e-06, "loss": 17.8761, "step": 21125 }, { "epoch": 0.3861663041292705, "grad_norm": 6.184005663463678, "learning_rate": 7.026529243601528e-06, "loss": 17.3393, "step": 21126 }, { "epoch": 0.386184583325717, "grad_norm": 6.267227339306514, "learning_rate": 7.02625863188304e-06, "loss": 17.6497, "step": 21127 }, { "epoch": 0.3862028625221635, "grad_norm": 6.75662883580309, "learning_rate": 7.025988013062691e-06, "loss": 17.5729, "step": 21128 }, { "epoch": 0.38622114171861005, "grad_norm": 9.296670103182908, "learning_rate": 7.025717387141431e-06, "loss": 18.15, "step": 21129 }, { "epoch": 0.3862394209150566, "grad_norm": 4.852905765168992, "learning_rate": 7.025446754120206e-06, "loss": 16.86, "step": 21130 }, { "epoch": 0.3862577001115031, "grad_norm": 5.890617171437006, "learning_rate": 7.0251761139999674e-06, "loss": 17.1843, "step": 21131 }, { "epoch": 0.3862759793079496, "grad_norm": 7.53836892579514, "learning_rate": 7.024905466781662e-06, "loss": 17.8045, "step": 21132 }, { "epoch": 0.38629425850439614, "grad_norm": 7.142241216227472, "learning_rate": 7.0246348124662375e-06, "loss": 17.5758, "step": 21133 }, { "epoch": 0.3863125377008427, "grad_norm": 5.6274541869633925, "learning_rate": 7.024364151054646e-06, "loss": 17.1649, "step": 21134 }, { "epoch": 0.3863308168972892, "grad_norm": 5.0354058954229926, "learning_rate": 7.024093482547831e-06, "loss": 16.9671, "step": 21135 }, { "epoch": 0.38634909609373574, "grad_norm": 6.676414643799981, "learning_rate": 7.023822806946745e-06, "loss": 17.4365, "step": 21136 }, { "epoch": 0.3863673752901822, "grad_norm": 6.334598866258384, "learning_rate": 7.023552124252335e-06, "loss": 17.5582, "step": 21137 }, { "epoch": 0.38638565448662876, "grad_norm": 5.145320711120825, "learning_rate": 7.023281434465553e-06, "loss": 16.9881, "step": 21138 }, { "epoch": 0.3864039336830753, "grad_norm": 8.21784938964477, "learning_rate": 7.0230107375873435e-06, "loss": 17.9658, "step": 21139 }, { "epoch": 0.38642221287952183, "grad_norm": 6.194250296436442, "learning_rate": 7.022740033618657e-06, "loss": 17.2713, "step": 21140 }, { "epoch": 0.38644049207596837, "grad_norm": 7.4350060971970615, "learning_rate": 7.0224693225604415e-06, "loss": 17.8246, "step": 21141 }, { "epoch": 0.38645877127241485, "grad_norm": 6.433020845599466, "learning_rate": 7.022198604413647e-06, "loss": 17.5145, "step": 21142 }, { "epoch": 0.3864770504688614, "grad_norm": 7.612530599796842, "learning_rate": 7.0219278791792225e-06, "loss": 17.9481, "step": 21143 }, { "epoch": 0.3864953296653079, "grad_norm": 6.7636431475299235, "learning_rate": 7.021657146858115e-06, "loss": 17.6575, "step": 21144 }, { "epoch": 0.38651360886175445, "grad_norm": 7.789390469904421, "learning_rate": 7.021386407451276e-06, "loss": 17.4399, "step": 21145 }, { "epoch": 0.38653188805820093, "grad_norm": 5.859613362592341, "learning_rate": 7.021115660959652e-06, "loss": 17.052, "step": 21146 }, { "epoch": 0.38655016725464747, "grad_norm": 7.707885355718643, "learning_rate": 7.020844907384193e-06, "loss": 17.7728, "step": 21147 }, { "epoch": 0.386568446451094, "grad_norm": 6.576837103363616, "learning_rate": 7.020574146725849e-06, "loss": 17.473, "step": 21148 }, { "epoch": 0.38658672564754054, "grad_norm": 6.165204627767476, "learning_rate": 7.020303378985568e-06, "loss": 17.5712, "step": 21149 }, { "epoch": 0.3866050048439871, "grad_norm": 6.642705530824343, "learning_rate": 7.020032604164297e-06, "loss": 17.3808, "step": 21150 }, { "epoch": 0.38662328404043356, "grad_norm": 4.789453489011865, "learning_rate": 7.019761822262988e-06, "loss": 16.6734, "step": 21151 }, { "epoch": 0.3866415632368801, "grad_norm": 7.416221651627426, "learning_rate": 7.019491033282591e-06, "loss": 17.8973, "step": 21152 }, { "epoch": 0.38665984243332663, "grad_norm": 6.346482425134587, "learning_rate": 7.019220237224051e-06, "loss": 17.6587, "step": 21153 }, { "epoch": 0.38667812162977316, "grad_norm": 5.579443119528855, "learning_rate": 7.01894943408832e-06, "loss": 16.9855, "step": 21154 }, { "epoch": 0.3866964008262197, "grad_norm": 6.875463484691944, "learning_rate": 7.018678623876346e-06, "loss": 17.9065, "step": 21155 }, { "epoch": 0.3867146800226662, "grad_norm": 8.57757477707898, "learning_rate": 7.0184078065890785e-06, "loss": 17.675, "step": 21156 }, { "epoch": 0.3867329592191127, "grad_norm": 8.57415771607685, "learning_rate": 7.018136982227467e-06, "loss": 18.3249, "step": 21157 }, { "epoch": 0.38675123841555925, "grad_norm": 8.106472151018053, "learning_rate": 7.017866150792461e-06, "loss": 18.364, "step": 21158 }, { "epoch": 0.3867695176120058, "grad_norm": 5.573386292208885, "learning_rate": 7.017595312285008e-06, "loss": 16.8987, "step": 21159 }, { "epoch": 0.3867877968084523, "grad_norm": 7.469690580880193, "learning_rate": 7.0173244667060606e-06, "loss": 18.009, "step": 21160 }, { "epoch": 0.3868060760048988, "grad_norm": 7.218461469271216, "learning_rate": 7.017053614056564e-06, "loss": 18.0056, "step": 21161 }, { "epoch": 0.38682435520134534, "grad_norm": 6.64697296731165, "learning_rate": 7.016782754337471e-06, "loss": 17.7236, "step": 21162 }, { "epoch": 0.3868426343977919, "grad_norm": 6.469455702339826, "learning_rate": 7.016511887549729e-06, "loss": 17.52, "step": 21163 }, { "epoch": 0.3868609135942384, "grad_norm": 5.620907433957624, "learning_rate": 7.016241013694287e-06, "loss": 17.2523, "step": 21164 }, { "epoch": 0.38687919279068494, "grad_norm": 5.6795585345674375, "learning_rate": 7.015970132772097e-06, "loss": 17.3294, "step": 21165 }, { "epoch": 0.3868974719871314, "grad_norm": 7.020711561287148, "learning_rate": 7.015699244784104e-06, "loss": 17.7057, "step": 21166 }, { "epoch": 0.38691575118357796, "grad_norm": 5.718392094795572, "learning_rate": 7.015428349731261e-06, "loss": 17.165, "step": 21167 }, { "epoch": 0.3869340303800245, "grad_norm": 6.738196681250109, "learning_rate": 7.015157447614518e-06, "loss": 17.3594, "step": 21168 }, { "epoch": 0.38695230957647103, "grad_norm": 6.514408171224374, "learning_rate": 7.014886538434822e-06, "loss": 17.7885, "step": 21169 }, { "epoch": 0.38697058877291757, "grad_norm": 6.240929938726908, "learning_rate": 7.014615622193124e-06, "loss": 17.3936, "step": 21170 }, { "epoch": 0.38698886796936405, "grad_norm": 6.488269955690865, "learning_rate": 7.0143446988903714e-06, "loss": 17.4412, "step": 21171 }, { "epoch": 0.3870071471658106, "grad_norm": 7.510448804413826, "learning_rate": 7.014073768527517e-06, "loss": 17.868, "step": 21172 }, { "epoch": 0.3870254263622571, "grad_norm": 6.568964597754895, "learning_rate": 7.013802831105508e-06, "loss": 17.8103, "step": 21173 }, { "epoch": 0.38704370555870365, "grad_norm": 6.347534864591465, "learning_rate": 7.0135318866252955e-06, "loss": 17.3994, "step": 21174 }, { "epoch": 0.3870619847551502, "grad_norm": 6.076323680890524, "learning_rate": 7.0132609350878285e-06, "loss": 17.266, "step": 21175 }, { "epoch": 0.38708026395159667, "grad_norm": 6.32945252994887, "learning_rate": 7.012989976494057e-06, "loss": 17.3789, "step": 21176 }, { "epoch": 0.3870985431480432, "grad_norm": 5.935308236977482, "learning_rate": 7.012719010844928e-06, "loss": 17.4507, "step": 21177 }, { "epoch": 0.38711682234448974, "grad_norm": 6.854667051747896, "learning_rate": 7.012448038141396e-06, "loss": 17.7286, "step": 21178 }, { "epoch": 0.3871351015409363, "grad_norm": 6.164314235251316, "learning_rate": 7.012177058384408e-06, "loss": 17.3948, "step": 21179 }, { "epoch": 0.38715338073738276, "grad_norm": 6.860808195260509, "learning_rate": 7.0119060715749146e-06, "loss": 17.8231, "step": 21180 }, { "epoch": 0.3871716599338293, "grad_norm": 5.204002291292977, "learning_rate": 7.011635077713863e-06, "loss": 17.0888, "step": 21181 }, { "epoch": 0.38718993913027583, "grad_norm": 5.552994605069955, "learning_rate": 7.0113640768022055e-06, "loss": 17.0881, "step": 21182 }, { "epoch": 0.38720821832672236, "grad_norm": 7.288976439343021, "learning_rate": 7.0110930688408915e-06, "loss": 17.6469, "step": 21183 }, { "epoch": 0.3872264975231689, "grad_norm": 7.121803750697378, "learning_rate": 7.010822053830872e-06, "loss": 17.7485, "step": 21184 }, { "epoch": 0.3872447767196154, "grad_norm": 7.171010355826255, "learning_rate": 7.010551031773094e-06, "loss": 18.1696, "step": 21185 }, { "epoch": 0.3872630559160619, "grad_norm": 6.215532642912796, "learning_rate": 7.01028000266851e-06, "loss": 17.5473, "step": 21186 }, { "epoch": 0.38728133511250845, "grad_norm": 6.009141313573754, "learning_rate": 7.010008966518069e-06, "loss": 17.534, "step": 21187 }, { "epoch": 0.387299614308955, "grad_norm": 7.382995024123719, "learning_rate": 7.009737923322722e-06, "loss": 17.8126, "step": 21188 }, { "epoch": 0.3873178935054015, "grad_norm": 7.008409613622167, "learning_rate": 7.009466873083416e-06, "loss": 17.8726, "step": 21189 }, { "epoch": 0.387336172701848, "grad_norm": 6.326699647289833, "learning_rate": 7.009195815801105e-06, "loss": 17.2901, "step": 21190 }, { "epoch": 0.38735445189829454, "grad_norm": 6.816441338042101, "learning_rate": 7.008924751476734e-06, "loss": 17.8958, "step": 21191 }, { "epoch": 0.3873727310947411, "grad_norm": 6.4113341488070965, "learning_rate": 7.0086536801112595e-06, "loss": 17.5528, "step": 21192 }, { "epoch": 0.3873910102911876, "grad_norm": 6.098204645303003, "learning_rate": 7.008382601705626e-06, "loss": 17.3523, "step": 21193 }, { "epoch": 0.38740928948763415, "grad_norm": 5.224449357427487, "learning_rate": 7.008111516260787e-06, "loss": 17.1505, "step": 21194 }, { "epoch": 0.3874275686840806, "grad_norm": 7.138159137774924, "learning_rate": 7.007840423777691e-06, "loss": 18.0406, "step": 21195 }, { "epoch": 0.38744584788052716, "grad_norm": 7.110189495810454, "learning_rate": 7.0075693242572885e-06, "loss": 17.7023, "step": 21196 }, { "epoch": 0.3874641270769737, "grad_norm": 8.82591569452853, "learning_rate": 7.007298217700529e-06, "loss": 18.3154, "step": 21197 }, { "epoch": 0.38748240627342023, "grad_norm": 9.124803495482377, "learning_rate": 7.0070271041083635e-06, "loss": 18.4117, "step": 21198 }, { "epoch": 0.38750068546986677, "grad_norm": 6.888477199115898, "learning_rate": 7.006755983481744e-06, "loss": 17.9521, "step": 21199 }, { "epoch": 0.38751896466631325, "grad_norm": 7.585323345702857, "learning_rate": 7.006484855821617e-06, "loss": 17.995, "step": 21200 }, { "epoch": 0.3875372438627598, "grad_norm": 5.733917607894738, "learning_rate": 7.0062137211289346e-06, "loss": 17.2265, "step": 21201 }, { "epoch": 0.3875555230592063, "grad_norm": 7.162694575830329, "learning_rate": 7.0059425794046485e-06, "loss": 17.1872, "step": 21202 }, { "epoch": 0.38757380225565286, "grad_norm": 6.972112316308997, "learning_rate": 7.005671430649708e-06, "loss": 17.7391, "step": 21203 }, { "epoch": 0.3875920814520994, "grad_norm": 5.907092817545116, "learning_rate": 7.005400274865062e-06, "loss": 17.129, "step": 21204 }, { "epoch": 0.38761036064854587, "grad_norm": 6.014288258255214, "learning_rate": 7.005129112051662e-06, "loss": 17.1242, "step": 21205 }, { "epoch": 0.3876286398449924, "grad_norm": 7.994252068301885, "learning_rate": 7.004857942210459e-06, "loss": 18.3423, "step": 21206 }, { "epoch": 0.38764691904143894, "grad_norm": 5.43324849083137, "learning_rate": 7.004586765342403e-06, "loss": 17.2776, "step": 21207 }, { "epoch": 0.3876651982378855, "grad_norm": 6.869744228197584, "learning_rate": 7.004315581448444e-06, "loss": 17.8097, "step": 21208 }, { "epoch": 0.387683477434332, "grad_norm": 8.093794571431726, "learning_rate": 7.004044390529532e-06, "loss": 18.0262, "step": 21209 }, { "epoch": 0.3877017566307785, "grad_norm": 6.808434764031609, "learning_rate": 7.00377319258662e-06, "loss": 18.0233, "step": 21210 }, { "epoch": 0.38772003582722503, "grad_norm": 6.4851077505687025, "learning_rate": 7.003501987620655e-06, "loss": 17.6019, "step": 21211 }, { "epoch": 0.38773831502367156, "grad_norm": 5.779791405467095, "learning_rate": 7.003230775632591e-06, "loss": 17.2765, "step": 21212 }, { "epoch": 0.3877565942201181, "grad_norm": 5.9871841761264095, "learning_rate": 7.002959556623376e-06, "loss": 17.2801, "step": 21213 }, { "epoch": 0.3877748734165646, "grad_norm": 8.093114902296954, "learning_rate": 7.002688330593963e-06, "loss": 18.6881, "step": 21214 }, { "epoch": 0.3877931526130111, "grad_norm": 9.376799662022735, "learning_rate": 7.0024170975453e-06, "loss": 19.07, "step": 21215 }, { "epoch": 0.38781143180945765, "grad_norm": 8.387256154251714, "learning_rate": 7.002145857478338e-06, "loss": 18.239, "step": 21216 }, { "epoch": 0.3878297110059042, "grad_norm": 7.62922656084555, "learning_rate": 7.00187461039403e-06, "loss": 18.0096, "step": 21217 }, { "epoch": 0.3878479902023507, "grad_norm": 5.985392864732103, "learning_rate": 7.001603356293325e-06, "loss": 17.3189, "step": 21218 }, { "epoch": 0.3878662693987972, "grad_norm": 7.650112208586723, "learning_rate": 7.001332095177173e-06, "loss": 18.0754, "step": 21219 }, { "epoch": 0.38788454859524374, "grad_norm": 6.035053943900861, "learning_rate": 7.001060827046527e-06, "loss": 17.51, "step": 21220 }, { "epoch": 0.3879028277916903, "grad_norm": 6.216438855753022, "learning_rate": 7.0007895519023364e-06, "loss": 17.2839, "step": 21221 }, { "epoch": 0.3879211069881368, "grad_norm": 5.130572402721616, "learning_rate": 7.000518269745551e-06, "loss": 17.2857, "step": 21222 }, { "epoch": 0.38793938618458335, "grad_norm": 5.96959733512535, "learning_rate": 7.000246980577121e-06, "loss": 17.3203, "step": 21223 }, { "epoch": 0.3879576653810298, "grad_norm": 6.889450543395383, "learning_rate": 6.999975684398002e-06, "loss": 17.8088, "step": 21224 }, { "epoch": 0.38797594457747636, "grad_norm": 6.029957558576468, "learning_rate": 6.9997043812091415e-06, "loss": 17.6056, "step": 21225 }, { "epoch": 0.3879942237739229, "grad_norm": 6.906724005829455, "learning_rate": 6.99943307101149e-06, "loss": 17.6793, "step": 21226 }, { "epoch": 0.38801250297036943, "grad_norm": 5.340520529071106, "learning_rate": 6.999161753805999e-06, "loss": 17.1273, "step": 21227 }, { "epoch": 0.38803078216681597, "grad_norm": 6.47837350521822, "learning_rate": 6.998890429593619e-06, "loss": 17.6201, "step": 21228 }, { "epoch": 0.38804906136326245, "grad_norm": 6.268382853033523, "learning_rate": 6.9986190983753015e-06, "loss": 17.4448, "step": 21229 }, { "epoch": 0.388067340559709, "grad_norm": 5.7812184180378745, "learning_rate": 6.9983477601519975e-06, "loss": 17.2134, "step": 21230 }, { "epoch": 0.3880856197561555, "grad_norm": 7.476410771269091, "learning_rate": 6.9980764149246594e-06, "loss": 17.9308, "step": 21231 }, { "epoch": 0.38810389895260206, "grad_norm": 7.042216423096148, "learning_rate": 6.997805062694235e-06, "loss": 17.8468, "step": 21232 }, { "epoch": 0.3881221781490486, "grad_norm": 7.155437551364444, "learning_rate": 6.997533703461679e-06, "loss": 17.9161, "step": 21233 }, { "epoch": 0.38814045734549507, "grad_norm": 7.6242148961471905, "learning_rate": 6.99726233722794e-06, "loss": 17.8064, "step": 21234 }, { "epoch": 0.3881587365419416, "grad_norm": 6.409742456396555, "learning_rate": 6.996990963993971e-06, "loss": 17.5401, "step": 21235 }, { "epoch": 0.38817701573838814, "grad_norm": 4.970076298313822, "learning_rate": 6.9967195837607184e-06, "loss": 16.9293, "step": 21236 }, { "epoch": 0.3881952949348347, "grad_norm": 6.927960706911375, "learning_rate": 6.99644819652914e-06, "loss": 17.711, "step": 21237 }, { "epoch": 0.3882135741312812, "grad_norm": 7.612483565007718, "learning_rate": 6.996176802300183e-06, "loss": 17.8934, "step": 21238 }, { "epoch": 0.3882318533277277, "grad_norm": 7.882187425633395, "learning_rate": 6.9959054010748e-06, "loss": 17.8482, "step": 21239 }, { "epoch": 0.38825013252417423, "grad_norm": 9.240745684312687, "learning_rate": 6.9956339928539406e-06, "loss": 18.8445, "step": 21240 }, { "epoch": 0.38826841172062077, "grad_norm": 7.712105683195305, "learning_rate": 6.995362577638558e-06, "loss": 17.8008, "step": 21241 }, { "epoch": 0.3882866909170673, "grad_norm": 10.050560138755184, "learning_rate": 6.995091155429603e-06, "loss": 18.0613, "step": 21242 }, { "epoch": 0.38830497011351384, "grad_norm": 6.018862137503063, "learning_rate": 6.994819726228026e-06, "loss": 17.3277, "step": 21243 }, { "epoch": 0.3883232493099603, "grad_norm": 6.199811183230868, "learning_rate": 6.994548290034779e-06, "loss": 17.444, "step": 21244 }, { "epoch": 0.38834152850640685, "grad_norm": 6.255313750780701, "learning_rate": 6.9942768468508134e-06, "loss": 17.5624, "step": 21245 }, { "epoch": 0.3883598077028534, "grad_norm": 5.6463831568429645, "learning_rate": 6.994005396677082e-06, "loss": 16.9848, "step": 21246 }, { "epoch": 0.3883780868992999, "grad_norm": 6.679686702760674, "learning_rate": 6.993733939514534e-06, "loss": 17.4929, "step": 21247 }, { "epoch": 0.3883963660957464, "grad_norm": 6.9273344663011285, "learning_rate": 6.993462475364121e-06, "loss": 17.7167, "step": 21248 }, { "epoch": 0.38841464529219294, "grad_norm": 6.811534633973879, "learning_rate": 6.993191004226795e-06, "loss": 17.6195, "step": 21249 }, { "epoch": 0.3884329244886395, "grad_norm": 6.014366310528886, "learning_rate": 6.992919526103507e-06, "loss": 17.5811, "step": 21250 }, { "epoch": 0.388451203685086, "grad_norm": 6.22316949754385, "learning_rate": 6.992648040995209e-06, "loss": 17.4077, "step": 21251 }, { "epoch": 0.38846948288153255, "grad_norm": 5.558124965916242, "learning_rate": 6.9923765489028535e-06, "loss": 17.3147, "step": 21252 }, { "epoch": 0.388487762077979, "grad_norm": 6.249392869860253, "learning_rate": 6.9921050498273915e-06, "loss": 17.476, "step": 21253 }, { "epoch": 0.38850604127442556, "grad_norm": 7.6245737784902055, "learning_rate": 6.9918335437697725e-06, "loss": 17.9288, "step": 21254 }, { "epoch": 0.3885243204708721, "grad_norm": 6.749996569070155, "learning_rate": 6.99156203073095e-06, "loss": 17.5503, "step": 21255 }, { "epoch": 0.38854259966731863, "grad_norm": 6.65555231658176, "learning_rate": 6.991290510711877e-06, "loss": 17.3065, "step": 21256 }, { "epoch": 0.38856087886376517, "grad_norm": 5.966298807431138, "learning_rate": 6.991018983713502e-06, "loss": 17.3712, "step": 21257 }, { "epoch": 0.38857915806021165, "grad_norm": 6.940957382615158, "learning_rate": 6.990747449736779e-06, "loss": 17.5398, "step": 21258 }, { "epoch": 0.3885974372566582, "grad_norm": 7.656777098521687, "learning_rate": 6.990475908782659e-06, "loss": 17.7731, "step": 21259 }, { "epoch": 0.3886157164531047, "grad_norm": 5.286748892777709, "learning_rate": 6.990204360852093e-06, "loss": 17.0508, "step": 21260 }, { "epoch": 0.38863399564955126, "grad_norm": 5.743004171762552, "learning_rate": 6.989932805946033e-06, "loss": 17.3825, "step": 21261 }, { "epoch": 0.3886522748459978, "grad_norm": 7.176283377532258, "learning_rate": 6.989661244065433e-06, "loss": 17.9225, "step": 21262 }, { "epoch": 0.38867055404244427, "grad_norm": 5.632309989099968, "learning_rate": 6.989389675211241e-06, "loss": 17.1852, "step": 21263 }, { "epoch": 0.3886888332388908, "grad_norm": 6.291288617650993, "learning_rate": 6.989118099384412e-06, "loss": 17.4721, "step": 21264 }, { "epoch": 0.38870711243533734, "grad_norm": 5.919406410920219, "learning_rate": 6.9888465165858974e-06, "loss": 17.304, "step": 21265 }, { "epoch": 0.3887253916317839, "grad_norm": 7.682794152487566, "learning_rate": 6.988574926816647e-06, "loss": 18.1165, "step": 21266 }, { "epoch": 0.3887436708282304, "grad_norm": 5.4366959645772654, "learning_rate": 6.988303330077615e-06, "loss": 17.1581, "step": 21267 }, { "epoch": 0.3887619500246769, "grad_norm": 7.521380821342717, "learning_rate": 6.988031726369751e-06, "loss": 17.8559, "step": 21268 }, { "epoch": 0.38878022922112343, "grad_norm": 5.808104123312755, "learning_rate": 6.987760115694009e-06, "loss": 17.1664, "step": 21269 }, { "epoch": 0.38879850841756997, "grad_norm": 5.958179556614933, "learning_rate": 6.987488498051341e-06, "loss": 17.3527, "step": 21270 }, { "epoch": 0.3888167876140165, "grad_norm": 7.133793472041586, "learning_rate": 6.987216873442697e-06, "loss": 17.6617, "step": 21271 }, { "epoch": 0.38883506681046304, "grad_norm": 8.031273334361943, "learning_rate": 6.986945241869032e-06, "loss": 18.4279, "step": 21272 }, { "epoch": 0.3888533460069095, "grad_norm": 8.503223922463164, "learning_rate": 6.986673603331295e-06, "loss": 17.3892, "step": 21273 }, { "epoch": 0.38887162520335605, "grad_norm": 5.965962258022024, "learning_rate": 6.9864019578304395e-06, "loss": 17.3279, "step": 21274 }, { "epoch": 0.3888899043998026, "grad_norm": 6.595645195422536, "learning_rate": 6.9861303053674175e-06, "loss": 17.6504, "step": 21275 }, { "epoch": 0.3889081835962491, "grad_norm": 7.262360912825414, "learning_rate": 6.985858645943182e-06, "loss": 17.438, "step": 21276 }, { "epoch": 0.38892646279269566, "grad_norm": 6.147591248388254, "learning_rate": 6.9855869795586826e-06, "loss": 17.2427, "step": 21277 }, { "epoch": 0.38894474198914214, "grad_norm": 5.633766962180324, "learning_rate": 6.9853153062148746e-06, "loss": 17.1198, "step": 21278 }, { "epoch": 0.3889630211855887, "grad_norm": 6.725508579829761, "learning_rate": 6.9850436259127096e-06, "loss": 17.775, "step": 21279 }, { "epoch": 0.3889813003820352, "grad_norm": 5.49034085767427, "learning_rate": 6.984771938653138e-06, "loss": 17.1014, "step": 21280 }, { "epoch": 0.38899957957848175, "grad_norm": 7.925168542674946, "learning_rate": 6.984500244437112e-06, "loss": 18.0358, "step": 21281 }, { "epoch": 0.3890178587749282, "grad_norm": 8.082796156974405, "learning_rate": 6.9842285432655845e-06, "loss": 18.4971, "step": 21282 }, { "epoch": 0.38903613797137476, "grad_norm": 7.126770318695678, "learning_rate": 6.9839568351395095e-06, "loss": 17.8173, "step": 21283 }, { "epoch": 0.3890544171678213, "grad_norm": 5.307411454487401, "learning_rate": 6.983685120059838e-06, "loss": 17.0083, "step": 21284 }, { "epoch": 0.38907269636426783, "grad_norm": 6.385024001756688, "learning_rate": 6.983413398027522e-06, "loss": 17.6907, "step": 21285 }, { "epoch": 0.38909097556071437, "grad_norm": 7.627134498652095, "learning_rate": 6.983141669043514e-06, "loss": 17.709, "step": 21286 }, { "epoch": 0.38910925475716085, "grad_norm": 7.2880895484751695, "learning_rate": 6.982869933108766e-06, "loss": 17.7053, "step": 21287 }, { "epoch": 0.3891275339536074, "grad_norm": 5.2131421028496865, "learning_rate": 6.982598190224233e-06, "loss": 17.0961, "step": 21288 }, { "epoch": 0.3891458131500539, "grad_norm": 5.955602767712263, "learning_rate": 6.982326440390863e-06, "loss": 17.3809, "step": 21289 }, { "epoch": 0.38916409234650046, "grad_norm": 6.879215689190057, "learning_rate": 6.982054683609613e-06, "loss": 17.786, "step": 21290 }, { "epoch": 0.389182371542947, "grad_norm": 6.564078461155945, "learning_rate": 6.98178291988143e-06, "loss": 17.4708, "step": 21291 }, { "epoch": 0.38920065073939347, "grad_norm": 7.3157794080072795, "learning_rate": 6.981511149207272e-06, "loss": 17.9549, "step": 21292 }, { "epoch": 0.38921892993584, "grad_norm": 7.74486846707566, "learning_rate": 6.981239371588091e-06, "loss": 18.1465, "step": 21293 }, { "epoch": 0.38923720913228654, "grad_norm": 6.72366683820151, "learning_rate": 6.980967587024836e-06, "loss": 17.9271, "step": 21294 }, { "epoch": 0.3892554883287331, "grad_norm": 5.436869165037432, "learning_rate": 6.980695795518462e-06, "loss": 17.1761, "step": 21295 }, { "epoch": 0.3892737675251796, "grad_norm": 6.483646507250865, "learning_rate": 6.980423997069921e-06, "loss": 17.6019, "step": 21296 }, { "epoch": 0.3892920467216261, "grad_norm": 5.810803759790037, "learning_rate": 6.980152191680165e-06, "loss": 17.2279, "step": 21297 }, { "epoch": 0.38931032591807263, "grad_norm": 8.216575660666162, "learning_rate": 6.979880379350148e-06, "loss": 18.4252, "step": 21298 }, { "epoch": 0.38932860511451917, "grad_norm": 6.958361073056182, "learning_rate": 6.979608560080822e-06, "loss": 17.8733, "step": 21299 }, { "epoch": 0.3893468843109657, "grad_norm": 6.095786815181945, "learning_rate": 6.979336733873139e-06, "loss": 17.3995, "step": 21300 }, { "epoch": 0.38936516350741224, "grad_norm": 6.236943758539881, "learning_rate": 6.9790649007280544e-06, "loss": 17.507, "step": 21301 }, { "epoch": 0.3893834427038587, "grad_norm": 7.616428036504893, "learning_rate": 6.978793060646517e-06, "loss": 17.8563, "step": 21302 }, { "epoch": 0.38940172190030525, "grad_norm": 7.625061378673789, "learning_rate": 6.9785212136294835e-06, "loss": 18.0646, "step": 21303 }, { "epoch": 0.3894200010967518, "grad_norm": 7.409375462993235, "learning_rate": 6.978249359677903e-06, "loss": 17.8779, "step": 21304 }, { "epoch": 0.3894382802931983, "grad_norm": 6.573970239933467, "learning_rate": 6.977977498792732e-06, "loss": 17.907, "step": 21305 }, { "epoch": 0.38945655948964486, "grad_norm": 8.660433654350012, "learning_rate": 6.97770563097492e-06, "loss": 18.4487, "step": 21306 }, { "epoch": 0.38947483868609134, "grad_norm": 8.063613108737854, "learning_rate": 6.977433756225422e-06, "loss": 18.5591, "step": 21307 }, { "epoch": 0.3894931178825379, "grad_norm": 11.406531230982122, "learning_rate": 6.9771618745451905e-06, "loss": 18.5781, "step": 21308 }, { "epoch": 0.3895113970789844, "grad_norm": 6.6462230803461875, "learning_rate": 6.976889985935178e-06, "loss": 17.8404, "step": 21309 }, { "epoch": 0.38952967627543095, "grad_norm": 7.66327701944068, "learning_rate": 6.976618090396339e-06, "loss": 18.0304, "step": 21310 }, { "epoch": 0.3895479554718775, "grad_norm": 6.727846858984811, "learning_rate": 6.976346187929623e-06, "loss": 17.8418, "step": 21311 }, { "epoch": 0.38956623466832396, "grad_norm": 8.182694833695164, "learning_rate": 6.976074278535986e-06, "loss": 18.2975, "step": 21312 }, { "epoch": 0.3895845138647705, "grad_norm": 6.926322785438558, "learning_rate": 6.97580236221638e-06, "loss": 17.9299, "step": 21313 }, { "epoch": 0.38960279306121703, "grad_norm": 8.550408556159267, "learning_rate": 6.975530438971759e-06, "loss": 18.1579, "step": 21314 }, { "epoch": 0.38962107225766357, "grad_norm": 6.021824361613114, "learning_rate": 6.975258508803073e-06, "loss": 17.5287, "step": 21315 }, { "epoch": 0.38963935145411005, "grad_norm": 6.385797779918753, "learning_rate": 6.974986571711279e-06, "loss": 17.7935, "step": 21316 }, { "epoch": 0.3896576306505566, "grad_norm": 5.637782107208922, "learning_rate": 6.9747146276973285e-06, "loss": 17.0898, "step": 21317 }, { "epoch": 0.3896759098470031, "grad_norm": 5.861024354538965, "learning_rate": 6.9744426767621745e-06, "loss": 17.3149, "step": 21318 }, { "epoch": 0.38969418904344966, "grad_norm": 7.489401160585184, "learning_rate": 6.97417071890677e-06, "loss": 18.1764, "step": 21319 }, { "epoch": 0.3897124682398962, "grad_norm": 6.662047481926622, "learning_rate": 6.973898754132068e-06, "loss": 17.5862, "step": 21320 }, { "epoch": 0.3897307474363427, "grad_norm": 6.698633197460809, "learning_rate": 6.9736267824390235e-06, "loss": 17.4519, "step": 21321 }, { "epoch": 0.3897490266327892, "grad_norm": 5.485635347009831, "learning_rate": 6.973354803828587e-06, "loss": 17.1254, "step": 21322 }, { "epoch": 0.38976730582923574, "grad_norm": 7.2791840642705035, "learning_rate": 6.973082818301713e-06, "loss": 17.8611, "step": 21323 }, { "epoch": 0.3897855850256823, "grad_norm": 6.988091609826114, "learning_rate": 6.972810825859357e-06, "loss": 17.9001, "step": 21324 }, { "epoch": 0.3898038642221288, "grad_norm": 7.631402227675237, "learning_rate": 6.972538826502468e-06, "loss": 17.6979, "step": 21325 }, { "epoch": 0.3898221434185753, "grad_norm": 6.259318480862302, "learning_rate": 6.972266820232002e-06, "loss": 17.4767, "step": 21326 }, { "epoch": 0.38984042261502183, "grad_norm": 6.440930814892935, "learning_rate": 6.971994807048913e-06, "loss": 17.3391, "step": 21327 }, { "epoch": 0.38985870181146837, "grad_norm": 7.124678437374745, "learning_rate": 6.971722786954153e-06, "loss": 17.4527, "step": 21328 }, { "epoch": 0.3898769810079149, "grad_norm": 6.218475416507136, "learning_rate": 6.971450759948675e-06, "loss": 17.5786, "step": 21329 }, { "epoch": 0.38989526020436144, "grad_norm": 6.110012833418725, "learning_rate": 6.971178726033434e-06, "loss": 17.4619, "step": 21330 }, { "epoch": 0.3899135394008079, "grad_norm": 5.964802041121985, "learning_rate": 6.970906685209382e-06, "loss": 17.4164, "step": 21331 }, { "epoch": 0.38993181859725445, "grad_norm": 8.95644489526695, "learning_rate": 6.9706346374774725e-06, "loss": 18.7085, "step": 21332 }, { "epoch": 0.389950097793701, "grad_norm": 6.611856409956911, "learning_rate": 6.970362582838661e-06, "loss": 17.3819, "step": 21333 }, { "epoch": 0.3899683769901475, "grad_norm": 8.3973359875057, "learning_rate": 6.9700905212938995e-06, "loss": 18.4749, "step": 21334 }, { "epoch": 0.38998665618659406, "grad_norm": 7.78691454636137, "learning_rate": 6.969818452844141e-06, "loss": 17.9705, "step": 21335 }, { "epoch": 0.39000493538304054, "grad_norm": 7.820378788277538, "learning_rate": 6.96954637749034e-06, "loss": 18.2859, "step": 21336 }, { "epoch": 0.3900232145794871, "grad_norm": 7.308608328921452, "learning_rate": 6.969274295233449e-06, "loss": 17.9314, "step": 21337 }, { "epoch": 0.3900414937759336, "grad_norm": 5.854718803924013, "learning_rate": 6.969002206074425e-06, "loss": 17.4876, "step": 21338 }, { "epoch": 0.39005977297238015, "grad_norm": 7.528831837381876, "learning_rate": 6.968730110014217e-06, "loss": 17.8921, "step": 21339 }, { "epoch": 0.3900780521688267, "grad_norm": 5.982696355296637, "learning_rate": 6.968458007053781e-06, "loss": 17.5032, "step": 21340 }, { "epoch": 0.39009633136527316, "grad_norm": 5.648254364233851, "learning_rate": 6.968185897194071e-06, "loss": 17.1825, "step": 21341 }, { "epoch": 0.3901146105617197, "grad_norm": 7.828135749879042, "learning_rate": 6.96791378043604e-06, "loss": 17.9979, "step": 21342 }, { "epoch": 0.39013288975816623, "grad_norm": 5.338550953651234, "learning_rate": 6.967641656780641e-06, "loss": 16.9122, "step": 21343 }, { "epoch": 0.39015116895461277, "grad_norm": 6.0180414000488165, "learning_rate": 6.9673695262288295e-06, "loss": 17.5922, "step": 21344 }, { "epoch": 0.3901694481510593, "grad_norm": 6.810479249087184, "learning_rate": 6.967097388781558e-06, "loss": 17.3728, "step": 21345 }, { "epoch": 0.3901877273475058, "grad_norm": 6.766345601358699, "learning_rate": 6.9668252444397825e-06, "loss": 17.5683, "step": 21346 }, { "epoch": 0.3902060065439523, "grad_norm": 5.059032446363334, "learning_rate": 6.966553093204455e-06, "loss": 17.015, "step": 21347 }, { "epoch": 0.39022428574039886, "grad_norm": 6.346645110894162, "learning_rate": 6.966280935076529e-06, "loss": 17.3645, "step": 21348 }, { "epoch": 0.3902425649368454, "grad_norm": 7.672018816807479, "learning_rate": 6.966008770056959e-06, "loss": 18.1503, "step": 21349 }, { "epoch": 0.3902608441332919, "grad_norm": 7.026915302430959, "learning_rate": 6.965736598146698e-06, "loss": 18.068, "step": 21350 }, { "epoch": 0.3902791233297384, "grad_norm": 6.5049983056414495, "learning_rate": 6.9654644193467e-06, "loss": 17.5476, "step": 21351 }, { "epoch": 0.39029740252618494, "grad_norm": 8.315180938519537, "learning_rate": 6.965192233657923e-06, "loss": 17.9425, "step": 21352 }, { "epoch": 0.3903156817226315, "grad_norm": 6.170959138669452, "learning_rate": 6.964920041081315e-06, "loss": 17.2836, "step": 21353 }, { "epoch": 0.390333960919078, "grad_norm": 6.832621635727959, "learning_rate": 6.964647841617834e-06, "loss": 18.0796, "step": 21354 }, { "epoch": 0.3903522401155245, "grad_norm": 6.560517745910997, "learning_rate": 6.964375635268432e-06, "loss": 17.8781, "step": 21355 }, { "epoch": 0.39037051931197103, "grad_norm": 5.484001868073417, "learning_rate": 6.964103422034065e-06, "loss": 17.0136, "step": 21356 }, { "epoch": 0.39038879850841757, "grad_norm": 5.99839690517561, "learning_rate": 6.963831201915685e-06, "loss": 17.2651, "step": 21357 }, { "epoch": 0.3904070777048641, "grad_norm": 6.872484539048409, "learning_rate": 6.963558974914248e-06, "loss": 17.5205, "step": 21358 }, { "epoch": 0.39042535690131064, "grad_norm": 5.506546562774649, "learning_rate": 6.963286741030706e-06, "loss": 16.9392, "step": 21359 }, { "epoch": 0.3904436360977571, "grad_norm": 6.476657809631774, "learning_rate": 6.963014500266015e-06, "loss": 17.3503, "step": 21360 }, { "epoch": 0.39046191529420365, "grad_norm": 5.417844657813247, "learning_rate": 6.962742252621128e-06, "loss": 17.1027, "step": 21361 }, { "epoch": 0.3904801944906502, "grad_norm": 6.559373381981851, "learning_rate": 6.962469998097001e-06, "loss": 17.842, "step": 21362 }, { "epoch": 0.3904984736870967, "grad_norm": 7.3015999342370455, "learning_rate": 6.962197736694585e-06, "loss": 17.8833, "step": 21363 }, { "epoch": 0.39051675288354326, "grad_norm": 5.2950704084144, "learning_rate": 6.961925468414838e-06, "loss": 17.1602, "step": 21364 }, { "epoch": 0.39053503207998974, "grad_norm": 6.701239897971625, "learning_rate": 6.9616531932587115e-06, "loss": 17.4443, "step": 21365 }, { "epoch": 0.3905533112764363, "grad_norm": 6.224332576366943, "learning_rate": 6.961380911227161e-06, "loss": 17.3525, "step": 21366 }, { "epoch": 0.3905715904728828, "grad_norm": 6.300876541429538, "learning_rate": 6.961108622321141e-06, "loss": 17.5501, "step": 21367 }, { "epoch": 0.39058986966932935, "grad_norm": 4.867193414597315, "learning_rate": 6.960836326541605e-06, "loss": 16.8688, "step": 21368 }, { "epoch": 0.3906081488657759, "grad_norm": 6.014989360490721, "learning_rate": 6.960564023889508e-06, "loss": 17.4641, "step": 21369 }, { "epoch": 0.39062642806222236, "grad_norm": 6.288902481815039, "learning_rate": 6.960291714365804e-06, "loss": 17.2468, "step": 21370 }, { "epoch": 0.3906447072586689, "grad_norm": 7.155918954696706, "learning_rate": 6.960019397971448e-06, "loss": 17.9502, "step": 21371 }, { "epoch": 0.39066298645511544, "grad_norm": 8.354754346094992, "learning_rate": 6.9597470747073936e-06, "loss": 18.3131, "step": 21372 }, { "epoch": 0.39068126565156197, "grad_norm": 5.953499741152448, "learning_rate": 6.959474744574596e-06, "loss": 16.9842, "step": 21373 }, { "epoch": 0.3906995448480085, "grad_norm": 6.550937126367873, "learning_rate": 6.95920240757401e-06, "loss": 17.5067, "step": 21374 }, { "epoch": 0.390717824044455, "grad_norm": 6.852875182419491, "learning_rate": 6.958930063706588e-06, "loss": 17.9154, "step": 21375 }, { "epoch": 0.3907361032409015, "grad_norm": 6.767881163629693, "learning_rate": 6.958657712973289e-06, "loss": 17.5089, "step": 21376 }, { "epoch": 0.39075438243734806, "grad_norm": 6.7261822161174045, "learning_rate": 6.958385355375062e-06, "loss": 17.4858, "step": 21377 }, { "epoch": 0.3907726616337946, "grad_norm": 5.7532822623251345, "learning_rate": 6.958112990912865e-06, "loss": 17.4299, "step": 21378 }, { "epoch": 0.39079094083024113, "grad_norm": 6.290265203103681, "learning_rate": 6.957840619587653e-06, "loss": 17.6446, "step": 21379 }, { "epoch": 0.3908092200266876, "grad_norm": 7.53805699223157, "learning_rate": 6.957568241400378e-06, "loss": 18.0118, "step": 21380 }, { "epoch": 0.39082749922313414, "grad_norm": 6.14553882754755, "learning_rate": 6.957295856351997e-06, "loss": 17.5025, "step": 21381 }, { "epoch": 0.3908457784195807, "grad_norm": 6.223444201479475, "learning_rate": 6.957023464443462e-06, "loss": 17.4475, "step": 21382 }, { "epoch": 0.3908640576160272, "grad_norm": 6.902839466662411, "learning_rate": 6.956751065675732e-06, "loss": 17.7503, "step": 21383 }, { "epoch": 0.3908823368124737, "grad_norm": 6.001556486376833, "learning_rate": 6.956478660049759e-06, "loss": 17.3742, "step": 21384 }, { "epoch": 0.39090061600892023, "grad_norm": 7.430794110392538, "learning_rate": 6.956206247566497e-06, "loss": 18.3194, "step": 21385 }, { "epoch": 0.39091889520536677, "grad_norm": 6.880564037341758, "learning_rate": 6.955933828226903e-06, "loss": 17.5664, "step": 21386 }, { "epoch": 0.3909371744018133, "grad_norm": 6.151128109271224, "learning_rate": 6.95566140203193e-06, "loss": 17.5719, "step": 21387 }, { "epoch": 0.39095545359825984, "grad_norm": 6.9677245026585215, "learning_rate": 6.955388968982533e-06, "loss": 17.7612, "step": 21388 }, { "epoch": 0.3909737327947063, "grad_norm": 6.084702929661392, "learning_rate": 6.955116529079668e-06, "loss": 17.3963, "step": 21389 }, { "epoch": 0.39099201199115285, "grad_norm": 5.680522535403873, "learning_rate": 6.9548440823242894e-06, "loss": 17.2712, "step": 21390 }, { "epoch": 0.3910102911875994, "grad_norm": 6.214114351366993, "learning_rate": 6.954571628717352e-06, "loss": 17.3423, "step": 21391 }, { "epoch": 0.3910285703840459, "grad_norm": 6.662398299688071, "learning_rate": 6.95429916825981e-06, "loss": 17.4247, "step": 21392 }, { "epoch": 0.39104684958049246, "grad_norm": 8.062051943231458, "learning_rate": 6.9540267009526195e-06, "loss": 18.1852, "step": 21393 }, { "epoch": 0.39106512877693894, "grad_norm": 6.170123012231771, "learning_rate": 6.953754226796735e-06, "loss": 17.4669, "step": 21394 }, { "epoch": 0.3910834079733855, "grad_norm": 6.779457287130032, "learning_rate": 6.9534817457931106e-06, "loss": 17.3266, "step": 21395 }, { "epoch": 0.391101687169832, "grad_norm": 6.6416770898103845, "learning_rate": 6.953209257942703e-06, "loss": 17.7326, "step": 21396 }, { "epoch": 0.39111996636627855, "grad_norm": 5.499987273354564, "learning_rate": 6.9529367632464675e-06, "loss": 17.1806, "step": 21397 }, { "epoch": 0.3911382455627251, "grad_norm": 6.564647982097723, "learning_rate": 6.952664261705357e-06, "loss": 17.5584, "step": 21398 }, { "epoch": 0.39115652475917156, "grad_norm": 4.950783792128595, "learning_rate": 6.9523917533203264e-06, "loss": 16.7491, "step": 21399 }, { "epoch": 0.3911748039556181, "grad_norm": 5.412107465896945, "learning_rate": 6.952119238092334e-06, "loss": 17.1481, "step": 21400 }, { "epoch": 0.39119308315206464, "grad_norm": 6.919772189678604, "learning_rate": 6.951846716022333e-06, "loss": 17.8896, "step": 21401 }, { "epoch": 0.39121136234851117, "grad_norm": 6.4788537182646175, "learning_rate": 6.9515741871112765e-06, "loss": 17.4922, "step": 21402 }, { "epoch": 0.3912296415449577, "grad_norm": 6.40365381631644, "learning_rate": 6.951301651360125e-06, "loss": 17.3484, "step": 21403 }, { "epoch": 0.3912479207414042, "grad_norm": 10.072362100586338, "learning_rate": 6.951029108769828e-06, "loss": 17.8765, "step": 21404 }, { "epoch": 0.3912661999378507, "grad_norm": 7.4249881377671265, "learning_rate": 6.950756559341344e-06, "loss": 17.793, "step": 21405 }, { "epoch": 0.39128447913429726, "grad_norm": 5.574785088089333, "learning_rate": 6.950484003075627e-06, "loss": 17.0611, "step": 21406 }, { "epoch": 0.3913027583307438, "grad_norm": 8.11320467579111, "learning_rate": 6.950211439973635e-06, "loss": 17.8801, "step": 21407 }, { "epoch": 0.39132103752719033, "grad_norm": 5.957545539376082, "learning_rate": 6.949938870036319e-06, "loss": 17.5233, "step": 21408 }, { "epoch": 0.3913393167236368, "grad_norm": 8.042419256128051, "learning_rate": 6.949666293264636e-06, "loss": 18.249, "step": 21409 }, { "epoch": 0.39135759592008335, "grad_norm": 6.69497399498759, "learning_rate": 6.949393709659545e-06, "loss": 17.7397, "step": 21410 }, { "epoch": 0.3913758751165299, "grad_norm": 5.444913876067578, "learning_rate": 6.949121119221996e-06, "loss": 17.1343, "step": 21411 }, { "epoch": 0.3913941543129764, "grad_norm": 6.117132649027419, "learning_rate": 6.948848521952946e-06, "loss": 17.2501, "step": 21412 }, { "epoch": 0.39141243350942295, "grad_norm": 7.002488487919857, "learning_rate": 6.948575917853353e-06, "loss": 17.3118, "step": 21413 }, { "epoch": 0.39143071270586943, "grad_norm": 7.05743563768558, "learning_rate": 6.948303306924169e-06, "loss": 17.9495, "step": 21414 }, { "epoch": 0.39144899190231597, "grad_norm": 6.8819713184902245, "learning_rate": 6.9480306891663506e-06, "loss": 17.7145, "step": 21415 }, { "epoch": 0.3914672710987625, "grad_norm": 7.144076480747118, "learning_rate": 6.947758064580854e-06, "loss": 17.8562, "step": 21416 }, { "epoch": 0.39148555029520904, "grad_norm": 5.910437964592411, "learning_rate": 6.947485433168637e-06, "loss": 17.4718, "step": 21417 }, { "epoch": 0.3915038294916555, "grad_norm": 7.346846741473479, "learning_rate": 6.947212794930649e-06, "loss": 17.8076, "step": 21418 }, { "epoch": 0.39152210868810206, "grad_norm": 5.877024921883323, "learning_rate": 6.94694014986785e-06, "loss": 17.3449, "step": 21419 }, { "epoch": 0.3915403878845486, "grad_norm": 7.500077927696738, "learning_rate": 6.946667497981195e-06, "loss": 17.7087, "step": 21420 }, { "epoch": 0.3915586670809951, "grad_norm": 6.33079279859324, "learning_rate": 6.946394839271641e-06, "loss": 17.1952, "step": 21421 }, { "epoch": 0.39157694627744166, "grad_norm": 7.604093867593265, "learning_rate": 6.946122173740139e-06, "loss": 18.1463, "step": 21422 }, { "epoch": 0.39159522547388814, "grad_norm": 6.961778367265984, "learning_rate": 6.945849501387649e-06, "loss": 17.9901, "step": 21423 }, { "epoch": 0.3916135046703347, "grad_norm": 8.235848893700775, "learning_rate": 6.945576822215127e-06, "loss": 18.8584, "step": 21424 }, { "epoch": 0.3916317838667812, "grad_norm": 7.0501785748067345, "learning_rate": 6.945304136223525e-06, "loss": 17.8166, "step": 21425 }, { "epoch": 0.39165006306322775, "grad_norm": 7.336789474080342, "learning_rate": 6.945031443413801e-06, "loss": 18.0062, "step": 21426 }, { "epoch": 0.3916683422596743, "grad_norm": 8.57694652541582, "learning_rate": 6.944758743786912e-06, "loss": 18.2202, "step": 21427 }, { "epoch": 0.39168662145612076, "grad_norm": 6.263094385257538, "learning_rate": 6.944486037343812e-06, "loss": 17.2668, "step": 21428 }, { "epoch": 0.3917049006525673, "grad_norm": 5.130306232587203, "learning_rate": 6.944213324085456e-06, "loss": 16.9621, "step": 21429 }, { "epoch": 0.39172317984901384, "grad_norm": 6.770502139624869, "learning_rate": 6.943940604012801e-06, "loss": 17.6095, "step": 21430 }, { "epoch": 0.39174145904546037, "grad_norm": 6.802158937988266, "learning_rate": 6.943667877126803e-06, "loss": 17.7199, "step": 21431 }, { "epoch": 0.3917597382419069, "grad_norm": 9.665343205258099, "learning_rate": 6.943395143428418e-06, "loss": 17.8363, "step": 21432 }, { "epoch": 0.3917780174383534, "grad_norm": 4.94746228459059, "learning_rate": 6.943122402918603e-06, "loss": 16.8839, "step": 21433 }, { "epoch": 0.3917962966347999, "grad_norm": 7.560672697460262, "learning_rate": 6.942849655598312e-06, "loss": 17.9669, "step": 21434 }, { "epoch": 0.39181457583124646, "grad_norm": 5.195320813019755, "learning_rate": 6.942576901468501e-06, "loss": 17.116, "step": 21435 }, { "epoch": 0.391832855027693, "grad_norm": 5.082459973250335, "learning_rate": 6.942304140530125e-06, "loss": 16.927, "step": 21436 }, { "epoch": 0.39185113422413953, "grad_norm": 5.687533246753722, "learning_rate": 6.942031372784143e-06, "loss": 17.1209, "step": 21437 }, { "epoch": 0.391869413420586, "grad_norm": 6.92106101088448, "learning_rate": 6.94175859823151e-06, "loss": 17.5381, "step": 21438 }, { "epoch": 0.39188769261703255, "grad_norm": 7.247306842169459, "learning_rate": 6.9414858168731814e-06, "loss": 17.9658, "step": 21439 }, { "epoch": 0.3919059718134791, "grad_norm": 6.322222866411631, "learning_rate": 6.941213028710113e-06, "loss": 17.4772, "step": 21440 }, { "epoch": 0.3919242510099256, "grad_norm": 6.506007597978849, "learning_rate": 6.940940233743262e-06, "loss": 17.5034, "step": 21441 }, { "epoch": 0.39194253020637215, "grad_norm": 6.427767140365889, "learning_rate": 6.9406674319735835e-06, "loss": 17.938, "step": 21442 }, { "epoch": 0.39196080940281863, "grad_norm": 8.099055649396236, "learning_rate": 6.940394623402033e-06, "loss": 17.9091, "step": 21443 }, { "epoch": 0.39197908859926517, "grad_norm": 5.871450044502728, "learning_rate": 6.940121808029569e-06, "loss": 17.2447, "step": 21444 }, { "epoch": 0.3919973677957117, "grad_norm": 6.802340027753673, "learning_rate": 6.9398489858571475e-06, "loss": 17.7163, "step": 21445 }, { "epoch": 0.39201564699215824, "grad_norm": 6.690952033889844, "learning_rate": 6.939576156885722e-06, "loss": 17.4323, "step": 21446 }, { "epoch": 0.3920339261886048, "grad_norm": 6.167617635270627, "learning_rate": 6.9393033211162505e-06, "loss": 17.3473, "step": 21447 }, { "epoch": 0.39205220538505126, "grad_norm": 5.846650263525677, "learning_rate": 6.939030478549691e-06, "loss": 17.3274, "step": 21448 }, { "epoch": 0.3920704845814978, "grad_norm": 5.950837418407083, "learning_rate": 6.938757629186996e-06, "loss": 17.0843, "step": 21449 }, { "epoch": 0.3920887637779443, "grad_norm": 5.836430906477098, "learning_rate": 6.938484773029123e-06, "loss": 17.4089, "step": 21450 }, { "epoch": 0.39210704297439086, "grad_norm": 6.608261446021528, "learning_rate": 6.938211910077031e-06, "loss": 17.6756, "step": 21451 }, { "epoch": 0.39212532217083734, "grad_norm": 6.531429575224675, "learning_rate": 6.937939040331674e-06, "loss": 17.2479, "step": 21452 }, { "epoch": 0.3921436013672839, "grad_norm": 6.041658607655284, "learning_rate": 6.937666163794008e-06, "loss": 17.2461, "step": 21453 }, { "epoch": 0.3921618805637304, "grad_norm": 9.557082172843252, "learning_rate": 6.9373932804649915e-06, "loss": 18.5878, "step": 21454 }, { "epoch": 0.39218015976017695, "grad_norm": 6.205925750888624, "learning_rate": 6.937120390345579e-06, "loss": 17.3433, "step": 21455 }, { "epoch": 0.3921984389566235, "grad_norm": 5.817668641710071, "learning_rate": 6.936847493436727e-06, "loss": 17.1638, "step": 21456 }, { "epoch": 0.39221671815306997, "grad_norm": 5.751541819348251, "learning_rate": 6.9365745897393935e-06, "loss": 17.2232, "step": 21457 }, { "epoch": 0.3922349973495165, "grad_norm": 5.882162509912774, "learning_rate": 6.936301679254533e-06, "loss": 16.9619, "step": 21458 }, { "epoch": 0.39225327654596304, "grad_norm": 6.3118147297515215, "learning_rate": 6.9360287619831035e-06, "loss": 17.4475, "step": 21459 }, { "epoch": 0.3922715557424096, "grad_norm": 5.410386490362895, "learning_rate": 6.9357558379260615e-06, "loss": 16.971, "step": 21460 }, { "epoch": 0.3922898349388561, "grad_norm": 5.477369075142672, "learning_rate": 6.9354829070843635e-06, "loss": 17.1283, "step": 21461 }, { "epoch": 0.3923081141353026, "grad_norm": 6.588263923847018, "learning_rate": 6.935209969458967e-06, "loss": 17.66, "step": 21462 }, { "epoch": 0.3923263933317491, "grad_norm": 6.770909018164611, "learning_rate": 6.934937025050826e-06, "loss": 17.7483, "step": 21463 }, { "epoch": 0.39234467252819566, "grad_norm": 6.5732233956474415, "learning_rate": 6.9346640738608975e-06, "loss": 17.818, "step": 21464 }, { "epoch": 0.3923629517246422, "grad_norm": 6.378638997720948, "learning_rate": 6.934391115890142e-06, "loss": 17.5235, "step": 21465 }, { "epoch": 0.39238123092108873, "grad_norm": 5.609027096527029, "learning_rate": 6.934118151139512e-06, "loss": 17.1316, "step": 21466 }, { "epoch": 0.3923995101175352, "grad_norm": 8.59040276509835, "learning_rate": 6.933845179609966e-06, "loss": 18.5035, "step": 21467 }, { "epoch": 0.39241778931398175, "grad_norm": 6.018861877900512, "learning_rate": 6.933572201302459e-06, "loss": 17.3577, "step": 21468 }, { "epoch": 0.3924360685104283, "grad_norm": 7.895633672450876, "learning_rate": 6.933299216217952e-06, "loss": 18.2459, "step": 21469 }, { "epoch": 0.3924543477068748, "grad_norm": 7.084441441369362, "learning_rate": 6.933026224357397e-06, "loss": 17.7863, "step": 21470 }, { "epoch": 0.39247262690332135, "grad_norm": 5.888197528605958, "learning_rate": 6.932753225721753e-06, "loss": 17.2933, "step": 21471 }, { "epoch": 0.39249090609976783, "grad_norm": 5.328045823701662, "learning_rate": 6.9324802203119766e-06, "loss": 17.3001, "step": 21472 }, { "epoch": 0.39250918529621437, "grad_norm": 5.873985335509079, "learning_rate": 6.9322072081290245e-06, "loss": 17.1989, "step": 21473 }, { "epoch": 0.3925274644926609, "grad_norm": 6.382507901772117, "learning_rate": 6.931934189173854e-06, "loss": 17.2451, "step": 21474 }, { "epoch": 0.39254574368910744, "grad_norm": 6.416150137299243, "learning_rate": 6.931661163447423e-06, "loss": 17.5899, "step": 21475 }, { "epoch": 0.392564022885554, "grad_norm": 5.733098397335905, "learning_rate": 6.931388130950688e-06, "loss": 17.3838, "step": 21476 }, { "epoch": 0.39258230208200046, "grad_norm": 5.390450152159793, "learning_rate": 6.931115091684603e-06, "loss": 17.1822, "step": 21477 }, { "epoch": 0.392600581278447, "grad_norm": 7.565431469379208, "learning_rate": 6.930842045650127e-06, "loss": 17.9168, "step": 21478 }, { "epoch": 0.3926188604748935, "grad_norm": 6.791175985526804, "learning_rate": 6.930568992848219e-06, "loss": 17.561, "step": 21479 }, { "epoch": 0.39263713967134006, "grad_norm": 6.845110080529336, "learning_rate": 6.930295933279833e-06, "loss": 17.6659, "step": 21480 }, { "epoch": 0.3926554188677866, "grad_norm": 5.9062726952214835, "learning_rate": 6.930022866945928e-06, "loss": 17.3578, "step": 21481 }, { "epoch": 0.3926736980642331, "grad_norm": 9.224183488796932, "learning_rate": 6.929749793847459e-06, "loss": 17.6501, "step": 21482 }, { "epoch": 0.3926919772606796, "grad_norm": 6.297355400920313, "learning_rate": 6.929476713985386e-06, "loss": 17.4091, "step": 21483 }, { "epoch": 0.39271025645712615, "grad_norm": 6.22445551869341, "learning_rate": 6.9292036273606635e-06, "loss": 17.293, "step": 21484 }, { "epoch": 0.3927285356535727, "grad_norm": 6.79711374166879, "learning_rate": 6.92893053397425e-06, "loss": 17.6671, "step": 21485 }, { "epoch": 0.39274681485001917, "grad_norm": 7.058928683656009, "learning_rate": 6.928657433827102e-06, "loss": 17.8986, "step": 21486 }, { "epoch": 0.3927650940464657, "grad_norm": 5.999522263824738, "learning_rate": 6.928384326920178e-06, "loss": 17.5663, "step": 21487 }, { "epoch": 0.39278337324291224, "grad_norm": 5.60797011067639, "learning_rate": 6.928111213254434e-06, "loss": 17.1847, "step": 21488 }, { "epoch": 0.3928016524393588, "grad_norm": 6.880346185719323, "learning_rate": 6.927838092830826e-06, "loss": 17.7063, "step": 21489 }, { "epoch": 0.3928199316358053, "grad_norm": 6.67807673595115, "learning_rate": 6.927564965650315e-06, "loss": 17.7538, "step": 21490 }, { "epoch": 0.3928382108322518, "grad_norm": 6.885612102038492, "learning_rate": 6.927291831713855e-06, "loss": 17.6307, "step": 21491 }, { "epoch": 0.3928564900286983, "grad_norm": 5.380259452494266, "learning_rate": 6.927018691022403e-06, "loss": 17.0805, "step": 21492 }, { "epoch": 0.39287476922514486, "grad_norm": 5.43083297474748, "learning_rate": 6.926745543576921e-06, "loss": 17.1407, "step": 21493 }, { "epoch": 0.3928930484215914, "grad_norm": 7.756199643110793, "learning_rate": 6.926472389378361e-06, "loss": 17.8113, "step": 21494 }, { "epoch": 0.39291132761803793, "grad_norm": 10.660610872866012, "learning_rate": 6.926199228427681e-06, "loss": 17.8452, "step": 21495 }, { "epoch": 0.3929296068144844, "grad_norm": 8.26578635522386, "learning_rate": 6.925926060725843e-06, "loss": 18.2691, "step": 21496 }, { "epoch": 0.39294788601093095, "grad_norm": 6.998510645103447, "learning_rate": 6.925652886273799e-06, "loss": 17.9226, "step": 21497 }, { "epoch": 0.3929661652073775, "grad_norm": 6.001831030454143, "learning_rate": 6.9253797050725104e-06, "loss": 17.4689, "step": 21498 }, { "epoch": 0.392984444403824, "grad_norm": 7.423516301738987, "learning_rate": 6.925106517122932e-06, "loss": 18.0992, "step": 21499 }, { "epoch": 0.39300272360027055, "grad_norm": 6.370552452597878, "learning_rate": 6.9248333224260226e-06, "loss": 17.4909, "step": 21500 }, { "epoch": 0.39302100279671703, "grad_norm": 5.862008907473447, "learning_rate": 6.924560120982739e-06, "loss": 17.4139, "step": 21501 }, { "epoch": 0.39303928199316357, "grad_norm": 5.935590540687427, "learning_rate": 6.924286912794039e-06, "loss": 17.1046, "step": 21502 }, { "epoch": 0.3930575611896101, "grad_norm": 6.926847413856296, "learning_rate": 6.924013697860882e-06, "loss": 17.7685, "step": 21503 }, { "epoch": 0.39307584038605664, "grad_norm": 5.970307707596561, "learning_rate": 6.923740476184222e-06, "loss": 17.6556, "step": 21504 }, { "epoch": 0.3930941195825032, "grad_norm": 6.713674199500309, "learning_rate": 6.923467247765019e-06, "loss": 17.6955, "step": 21505 }, { "epoch": 0.39311239877894966, "grad_norm": 5.577756592376602, "learning_rate": 6.923194012604231e-06, "loss": 17.3995, "step": 21506 }, { "epoch": 0.3931306779753962, "grad_norm": 5.9016469407663825, "learning_rate": 6.9229207707028146e-06, "loss": 17.5544, "step": 21507 }, { "epoch": 0.39314895717184273, "grad_norm": 6.434933392640182, "learning_rate": 6.922647522061727e-06, "loss": 17.6404, "step": 21508 }, { "epoch": 0.39316723636828926, "grad_norm": 6.4413342933589925, "learning_rate": 6.922374266681927e-06, "loss": 17.9014, "step": 21509 }, { "epoch": 0.3931855155647358, "grad_norm": 6.065567085535461, "learning_rate": 6.922101004564373e-06, "loss": 17.1599, "step": 21510 }, { "epoch": 0.3932037947611823, "grad_norm": 7.44199599360796, "learning_rate": 6.92182773571002e-06, "loss": 17.7583, "step": 21511 }, { "epoch": 0.3932220739576288, "grad_norm": 7.304037294401231, "learning_rate": 6.92155446011983e-06, "loss": 17.6521, "step": 21512 }, { "epoch": 0.39324035315407535, "grad_norm": 6.205163333011176, "learning_rate": 6.9212811777947565e-06, "loss": 17.4399, "step": 21513 }, { "epoch": 0.3932586323505219, "grad_norm": 6.609599701956432, "learning_rate": 6.92100788873576e-06, "loss": 17.4695, "step": 21514 }, { "epoch": 0.3932769115469684, "grad_norm": 6.584113271674233, "learning_rate": 6.920734592943796e-06, "loss": 17.5762, "step": 21515 }, { "epoch": 0.3932951907434149, "grad_norm": 6.800658262301761, "learning_rate": 6.920461290419825e-06, "loss": 17.6977, "step": 21516 }, { "epoch": 0.39331346993986144, "grad_norm": 6.457193982312407, "learning_rate": 6.920187981164804e-06, "loss": 17.6557, "step": 21517 }, { "epoch": 0.393331749136308, "grad_norm": 7.853754503596599, "learning_rate": 6.919914665179691e-06, "loss": 18.1926, "step": 21518 }, { "epoch": 0.3933500283327545, "grad_norm": 6.6955447511823225, "learning_rate": 6.919641342465444e-06, "loss": 17.4948, "step": 21519 }, { "epoch": 0.393368307529201, "grad_norm": 5.966810488350211, "learning_rate": 6.91936801302302e-06, "loss": 17.3947, "step": 21520 }, { "epoch": 0.3933865867256475, "grad_norm": 6.077602715754855, "learning_rate": 6.919094676853378e-06, "loss": 17.0745, "step": 21521 }, { "epoch": 0.39340486592209406, "grad_norm": 5.584685044400404, "learning_rate": 6.918821333957475e-06, "loss": 17.0942, "step": 21522 }, { "epoch": 0.3934231451185406, "grad_norm": 7.001524006806588, "learning_rate": 6.91854798433627e-06, "loss": 17.9249, "step": 21523 }, { "epoch": 0.39344142431498713, "grad_norm": 6.599745661110747, "learning_rate": 6.918274627990722e-06, "loss": 17.7185, "step": 21524 }, { "epoch": 0.3934597035114336, "grad_norm": 8.165704272755427, "learning_rate": 6.918001264921786e-06, "loss": 18.4602, "step": 21525 }, { "epoch": 0.39347798270788015, "grad_norm": 6.1992574624140095, "learning_rate": 6.917727895130423e-06, "loss": 17.4023, "step": 21526 }, { "epoch": 0.3934962619043267, "grad_norm": 7.073284774296896, "learning_rate": 6.917454518617589e-06, "loss": 17.572, "step": 21527 }, { "epoch": 0.3935145411007732, "grad_norm": 7.292081190745441, "learning_rate": 6.917181135384246e-06, "loss": 17.8471, "step": 21528 }, { "epoch": 0.39353282029721975, "grad_norm": 7.072267248965862, "learning_rate": 6.9169077454313475e-06, "loss": 17.8776, "step": 21529 }, { "epoch": 0.39355109949366623, "grad_norm": 5.322622559874622, "learning_rate": 6.916634348759853e-06, "loss": 17.1781, "step": 21530 }, { "epoch": 0.39356937869011277, "grad_norm": 7.077310316001486, "learning_rate": 6.916360945370722e-06, "loss": 17.94, "step": 21531 }, { "epoch": 0.3935876578865593, "grad_norm": 6.440467751651259, "learning_rate": 6.916087535264913e-06, "loss": 17.45, "step": 21532 }, { "epoch": 0.39360593708300584, "grad_norm": 7.40705158499139, "learning_rate": 6.915814118443383e-06, "loss": 17.6429, "step": 21533 }, { "epoch": 0.3936242162794524, "grad_norm": 5.276780521040269, "learning_rate": 6.915540694907092e-06, "loss": 16.977, "step": 21534 }, { "epoch": 0.39364249547589886, "grad_norm": 7.254487865106271, "learning_rate": 6.9152672646569955e-06, "loss": 17.8337, "step": 21535 }, { "epoch": 0.3936607746723454, "grad_norm": 5.914857845336416, "learning_rate": 6.914993827694053e-06, "loss": 17.2768, "step": 21536 }, { "epoch": 0.39367905386879193, "grad_norm": 5.750367364362483, "learning_rate": 6.9147203840192246e-06, "loss": 17.3195, "step": 21537 }, { "epoch": 0.39369733306523846, "grad_norm": 6.5440410320419105, "learning_rate": 6.914446933633467e-06, "loss": 17.4559, "step": 21538 }, { "epoch": 0.393715612261685, "grad_norm": 5.75955858533619, "learning_rate": 6.914173476537739e-06, "loss": 17.282, "step": 21539 }, { "epoch": 0.3937338914581315, "grad_norm": 7.3803077544723585, "learning_rate": 6.913900012732999e-06, "loss": 17.4063, "step": 21540 }, { "epoch": 0.393752170654578, "grad_norm": 6.739488790118494, "learning_rate": 6.913626542220205e-06, "loss": 17.7722, "step": 21541 }, { "epoch": 0.39377044985102455, "grad_norm": 5.871982181584669, "learning_rate": 6.913353065000317e-06, "loss": 16.9883, "step": 21542 }, { "epoch": 0.3937887290474711, "grad_norm": 6.849382124331771, "learning_rate": 6.913079581074293e-06, "loss": 17.9869, "step": 21543 }, { "epoch": 0.3938070082439176, "grad_norm": 5.594692598085732, "learning_rate": 6.91280609044309e-06, "loss": 17.0854, "step": 21544 }, { "epoch": 0.3938252874403641, "grad_norm": 5.758206562655694, "learning_rate": 6.912532593107667e-06, "loss": 17.2974, "step": 21545 }, { "epoch": 0.39384356663681064, "grad_norm": 5.677117188934752, "learning_rate": 6.912259089068984e-06, "loss": 17.1663, "step": 21546 }, { "epoch": 0.3938618458332572, "grad_norm": 6.861772832897799, "learning_rate": 6.911985578327999e-06, "loss": 17.7252, "step": 21547 }, { "epoch": 0.3938801250297037, "grad_norm": 7.841222096859772, "learning_rate": 6.911712060885672e-06, "loss": 17.629, "step": 21548 }, { "epoch": 0.39389840422615024, "grad_norm": 7.624450085180011, "learning_rate": 6.9114385367429585e-06, "loss": 18.033, "step": 21549 }, { "epoch": 0.3939166834225967, "grad_norm": 7.868388483174876, "learning_rate": 6.911165005900817e-06, "loss": 18.2352, "step": 21550 }, { "epoch": 0.39393496261904326, "grad_norm": 7.593997323363449, "learning_rate": 6.91089146836021e-06, "loss": 18.1603, "step": 21551 }, { "epoch": 0.3939532418154898, "grad_norm": 9.515579700243576, "learning_rate": 6.910617924122094e-06, "loss": 17.5219, "step": 21552 }, { "epoch": 0.39397152101193633, "grad_norm": 6.462431585095287, "learning_rate": 6.9103443731874286e-06, "loss": 17.7226, "step": 21553 }, { "epoch": 0.3939898002083828, "grad_norm": 6.071070491291092, "learning_rate": 6.9100708155571705e-06, "loss": 17.3667, "step": 21554 }, { "epoch": 0.39400807940482935, "grad_norm": 7.148145689259566, "learning_rate": 6.909797251232282e-06, "loss": 17.588, "step": 21555 }, { "epoch": 0.3940263586012759, "grad_norm": 6.939347550435989, "learning_rate": 6.9095236802137174e-06, "loss": 17.5267, "step": 21556 }, { "epoch": 0.3940446377977224, "grad_norm": 5.705199918354399, "learning_rate": 6.909250102502439e-06, "loss": 17.3792, "step": 21557 }, { "epoch": 0.39406291699416895, "grad_norm": 7.28107304514705, "learning_rate": 6.908976518099405e-06, "loss": 18.0734, "step": 21558 }, { "epoch": 0.39408119619061543, "grad_norm": 8.205423068008912, "learning_rate": 6.908702927005574e-06, "loss": 18.3236, "step": 21559 }, { "epoch": 0.39409947538706197, "grad_norm": 6.7778303874948715, "learning_rate": 6.9084293292219055e-06, "loss": 17.8698, "step": 21560 }, { "epoch": 0.3941177545835085, "grad_norm": 5.884320065261258, "learning_rate": 6.908155724749357e-06, "loss": 17.2822, "step": 21561 }, { "epoch": 0.39413603377995504, "grad_norm": 6.104454965401443, "learning_rate": 6.907882113588889e-06, "loss": 17.4723, "step": 21562 }, { "epoch": 0.3941543129764016, "grad_norm": 5.783308774319683, "learning_rate": 6.907608495741458e-06, "loss": 17.4832, "step": 21563 }, { "epoch": 0.39417259217284806, "grad_norm": 6.2913806012946605, "learning_rate": 6.907334871208024e-06, "loss": 17.5201, "step": 21564 }, { "epoch": 0.3941908713692946, "grad_norm": 5.327093358403174, "learning_rate": 6.907061239989551e-06, "loss": 16.931, "step": 21565 }, { "epoch": 0.39420915056574113, "grad_norm": 5.1719985979508865, "learning_rate": 6.9067876020869905e-06, "loss": 17.1255, "step": 21566 }, { "epoch": 0.39422742976218766, "grad_norm": 6.54416253618085, "learning_rate": 6.906513957501306e-06, "loss": 17.6631, "step": 21567 }, { "epoch": 0.3942457089586342, "grad_norm": 6.253670207973528, "learning_rate": 6.9062403062334545e-06, "loss": 17.1095, "step": 21568 }, { "epoch": 0.3942639881550807, "grad_norm": 7.244370332448828, "learning_rate": 6.905966648284398e-06, "loss": 17.801, "step": 21569 }, { "epoch": 0.3942822673515272, "grad_norm": 6.038511238306077, "learning_rate": 6.905692983655092e-06, "loss": 17.4237, "step": 21570 }, { "epoch": 0.39430054654797375, "grad_norm": 7.0794140402264265, "learning_rate": 6.905419312346499e-06, "loss": 17.909, "step": 21571 }, { "epoch": 0.3943188257444203, "grad_norm": 6.083436443068745, "learning_rate": 6.905145634359576e-06, "loss": 17.4752, "step": 21572 }, { "epoch": 0.3943371049408668, "grad_norm": 7.731608014735897, "learning_rate": 6.904871949695282e-06, "loss": 18.1243, "step": 21573 }, { "epoch": 0.3943553841373133, "grad_norm": 6.637765956525025, "learning_rate": 6.9045982583545775e-06, "loss": 17.7536, "step": 21574 }, { "epoch": 0.39437366333375984, "grad_norm": 6.532880660508335, "learning_rate": 6.904324560338422e-06, "loss": 17.4753, "step": 21575 }, { "epoch": 0.3943919425302064, "grad_norm": 6.107409359039475, "learning_rate": 6.904050855647775e-06, "loss": 17.5301, "step": 21576 }, { "epoch": 0.3944102217266529, "grad_norm": 5.78441930579743, "learning_rate": 6.903777144283593e-06, "loss": 17.3339, "step": 21577 }, { "epoch": 0.39442850092309945, "grad_norm": 5.36158213128983, "learning_rate": 6.9035034262468385e-06, "loss": 17.2054, "step": 21578 }, { "epoch": 0.3944467801195459, "grad_norm": 7.98591769113503, "learning_rate": 6.903229701538469e-06, "loss": 17.8269, "step": 21579 }, { "epoch": 0.39446505931599246, "grad_norm": 6.972481701306595, "learning_rate": 6.902955970159446e-06, "loss": 17.9411, "step": 21580 }, { "epoch": 0.394483338512439, "grad_norm": 7.447622898864084, "learning_rate": 6.902682232110727e-06, "loss": 17.9744, "step": 21581 }, { "epoch": 0.39450161770888553, "grad_norm": 6.104928288760058, "learning_rate": 6.90240848739327e-06, "loss": 17.5479, "step": 21582 }, { "epoch": 0.39451989690533207, "grad_norm": 6.141051315020905, "learning_rate": 6.9021347360080385e-06, "loss": 17.4445, "step": 21583 }, { "epoch": 0.39453817610177855, "grad_norm": 5.794997374883734, "learning_rate": 6.901860977955989e-06, "loss": 17.3963, "step": 21584 }, { "epoch": 0.3945564552982251, "grad_norm": 6.731518293898058, "learning_rate": 6.901587213238081e-06, "loss": 17.715, "step": 21585 }, { "epoch": 0.3945747344946716, "grad_norm": 7.191380738474616, "learning_rate": 6.901313441855275e-06, "loss": 17.5182, "step": 21586 }, { "epoch": 0.39459301369111816, "grad_norm": 6.365568178045943, "learning_rate": 6.901039663808531e-06, "loss": 17.5003, "step": 21587 }, { "epoch": 0.39461129288756464, "grad_norm": 7.978823305473955, "learning_rate": 6.9007658790988086e-06, "loss": 17.7458, "step": 21588 }, { "epoch": 0.39462957208401117, "grad_norm": 6.411446773921178, "learning_rate": 6.900492087727065e-06, "loss": 17.4705, "step": 21589 }, { "epoch": 0.3946478512804577, "grad_norm": 7.511496973229927, "learning_rate": 6.900218289694262e-06, "loss": 18.0328, "step": 21590 }, { "epoch": 0.39466613047690424, "grad_norm": 6.437665494260655, "learning_rate": 6.8999444850013604e-06, "loss": 17.6575, "step": 21591 }, { "epoch": 0.3946844096733508, "grad_norm": 9.144353950375423, "learning_rate": 6.899670673649317e-06, "loss": 18.3534, "step": 21592 }, { "epoch": 0.39470268886979726, "grad_norm": 6.14218081702421, "learning_rate": 6.8993968556390945e-06, "loss": 17.2321, "step": 21593 }, { "epoch": 0.3947209680662438, "grad_norm": 5.523745245072925, "learning_rate": 6.899123030971648e-06, "loss": 16.9665, "step": 21594 }, { "epoch": 0.39473924726269033, "grad_norm": 5.152206424640226, "learning_rate": 6.8988491996479414e-06, "loss": 17.0409, "step": 21595 }, { "epoch": 0.39475752645913686, "grad_norm": 6.574388933840838, "learning_rate": 6.8985753616689335e-06, "loss": 17.6122, "step": 21596 }, { "epoch": 0.3947758056555834, "grad_norm": 5.76128631986071, "learning_rate": 6.898301517035584e-06, "loss": 17.3483, "step": 21597 }, { "epoch": 0.3947940848520299, "grad_norm": 6.052356447756394, "learning_rate": 6.8980276657488505e-06, "loss": 17.416, "step": 21598 }, { "epoch": 0.3948123640484764, "grad_norm": 6.493268264946741, "learning_rate": 6.897753807809696e-06, "loss": 17.8602, "step": 21599 }, { "epoch": 0.39483064324492295, "grad_norm": 6.095529945679339, "learning_rate": 6.897479943219079e-06, "loss": 17.2012, "step": 21600 }, { "epoch": 0.3948489224413695, "grad_norm": 6.111414826188851, "learning_rate": 6.89720607197796e-06, "loss": 17.449, "step": 21601 }, { "epoch": 0.394867201637816, "grad_norm": 7.318563016598272, "learning_rate": 6.896932194087298e-06, "loss": 17.6215, "step": 21602 }, { "epoch": 0.3948854808342625, "grad_norm": 6.858298435815988, "learning_rate": 6.896658309548053e-06, "loss": 17.624, "step": 21603 }, { "epoch": 0.39490376003070904, "grad_norm": 6.4539344725539065, "learning_rate": 6.896384418361185e-06, "loss": 17.7222, "step": 21604 }, { "epoch": 0.3949220392271556, "grad_norm": 6.983623354502389, "learning_rate": 6.896110520527655e-06, "loss": 17.5526, "step": 21605 }, { "epoch": 0.3949403184236021, "grad_norm": 8.105170382444221, "learning_rate": 6.8958366160484214e-06, "loss": 18.1537, "step": 21606 }, { "epoch": 0.39495859762004865, "grad_norm": 6.996364276421232, "learning_rate": 6.895562704924446e-06, "loss": 18.1652, "step": 21607 }, { "epoch": 0.3949768768164951, "grad_norm": 6.259866267692265, "learning_rate": 6.895288787156687e-06, "loss": 17.491, "step": 21608 }, { "epoch": 0.39499515601294166, "grad_norm": 5.608185112770575, "learning_rate": 6.895014862746103e-06, "loss": 17.0852, "step": 21609 }, { "epoch": 0.3950134352093882, "grad_norm": 7.386344062264315, "learning_rate": 6.89474093169366e-06, "loss": 17.7781, "step": 21610 }, { "epoch": 0.39503171440583473, "grad_norm": 7.345399211896186, "learning_rate": 6.894466994000313e-06, "loss": 18.0912, "step": 21611 }, { "epoch": 0.39504999360228127, "grad_norm": 5.4158628072361, "learning_rate": 6.894193049667024e-06, "loss": 17.2701, "step": 21612 }, { "epoch": 0.39506827279872775, "grad_norm": 5.333877593585367, "learning_rate": 6.893919098694752e-06, "loss": 17.0123, "step": 21613 }, { "epoch": 0.3950865519951743, "grad_norm": 8.665147609340739, "learning_rate": 6.893645141084458e-06, "loss": 18.3743, "step": 21614 }, { "epoch": 0.3951048311916208, "grad_norm": 7.752494504262045, "learning_rate": 6.893371176837103e-06, "loss": 18.1383, "step": 21615 }, { "epoch": 0.39512311038806736, "grad_norm": 5.804033550700176, "learning_rate": 6.893097205953645e-06, "loss": 17.3385, "step": 21616 }, { "epoch": 0.3951413895845139, "grad_norm": 5.224038404367742, "learning_rate": 6.8928232284350474e-06, "loss": 16.9664, "step": 21617 }, { "epoch": 0.39515966878096037, "grad_norm": 6.568917726489629, "learning_rate": 6.892549244282267e-06, "loss": 17.9612, "step": 21618 }, { "epoch": 0.3951779479774069, "grad_norm": 5.9351522721466585, "learning_rate": 6.8922752534962656e-06, "loss": 17.5031, "step": 21619 }, { "epoch": 0.39519622717385344, "grad_norm": 6.6544735398237975, "learning_rate": 6.892001256078005e-06, "loss": 17.6356, "step": 21620 }, { "epoch": 0.3952145063703, "grad_norm": 7.363304186122785, "learning_rate": 6.891727252028444e-06, "loss": 17.6523, "step": 21621 }, { "epoch": 0.39523278556674646, "grad_norm": 6.160753133854578, "learning_rate": 6.8914532413485415e-06, "loss": 17.2715, "step": 21622 }, { "epoch": 0.395251064763193, "grad_norm": 7.278316791103219, "learning_rate": 6.89117922403926e-06, "loss": 17.9876, "step": 21623 }, { "epoch": 0.39526934395963953, "grad_norm": 6.2495401952620275, "learning_rate": 6.8909052001015616e-06, "loss": 17.3047, "step": 21624 }, { "epoch": 0.39528762315608607, "grad_norm": 8.317036432905699, "learning_rate": 6.8906311695364035e-06, "loss": 18.1389, "step": 21625 }, { "epoch": 0.3953059023525326, "grad_norm": 6.982178598956792, "learning_rate": 6.890357132344746e-06, "loss": 17.6884, "step": 21626 }, { "epoch": 0.3953241815489791, "grad_norm": 7.886219479599872, "learning_rate": 6.890083088527551e-06, "loss": 17.9141, "step": 21627 }, { "epoch": 0.3953424607454256, "grad_norm": 6.750399332128435, "learning_rate": 6.8898090380857795e-06, "loss": 17.7167, "step": 21628 }, { "epoch": 0.39536073994187215, "grad_norm": 5.841837478245158, "learning_rate": 6.889534981020392e-06, "loss": 17.3937, "step": 21629 }, { "epoch": 0.3953790191383187, "grad_norm": 5.332917163119414, "learning_rate": 6.889260917332347e-06, "loss": 17.2659, "step": 21630 }, { "epoch": 0.3953972983347652, "grad_norm": 6.827078039101366, "learning_rate": 6.888986847022607e-06, "loss": 17.7501, "step": 21631 }, { "epoch": 0.3954155775312117, "grad_norm": 6.815880046059335, "learning_rate": 6.888712770092132e-06, "loss": 17.9065, "step": 21632 }, { "epoch": 0.39543385672765824, "grad_norm": 5.806367709665783, "learning_rate": 6.8884386865418825e-06, "loss": 17.2579, "step": 21633 }, { "epoch": 0.3954521359241048, "grad_norm": 6.373527468823904, "learning_rate": 6.888164596372819e-06, "loss": 17.1608, "step": 21634 }, { "epoch": 0.3954704151205513, "grad_norm": 7.91456275985367, "learning_rate": 6.887890499585903e-06, "loss": 18.3949, "step": 21635 }, { "epoch": 0.39548869431699785, "grad_norm": 6.545420292366348, "learning_rate": 6.887616396182094e-06, "loss": 17.4711, "step": 21636 }, { "epoch": 0.3955069735134443, "grad_norm": 6.150967724446916, "learning_rate": 6.887342286162354e-06, "loss": 17.3862, "step": 21637 }, { "epoch": 0.39552525270989086, "grad_norm": 6.086142113223638, "learning_rate": 6.887068169527643e-06, "loss": 17.2927, "step": 21638 }, { "epoch": 0.3955435319063374, "grad_norm": 6.752372891295724, "learning_rate": 6.886794046278922e-06, "loss": 17.7975, "step": 21639 }, { "epoch": 0.39556181110278393, "grad_norm": 5.734532996593663, "learning_rate": 6.886519916417152e-06, "loss": 17.3136, "step": 21640 }, { "epoch": 0.39558009029923047, "grad_norm": 6.179620272208543, "learning_rate": 6.886245779943291e-06, "loss": 17.3563, "step": 21641 }, { "epoch": 0.39559836949567695, "grad_norm": 6.746739964031116, "learning_rate": 6.885971636858304e-06, "loss": 17.4968, "step": 21642 }, { "epoch": 0.3956166486921235, "grad_norm": 5.582131068210862, "learning_rate": 6.88569748716315e-06, "loss": 17.0617, "step": 21643 }, { "epoch": 0.39563492788857, "grad_norm": 7.099942387867566, "learning_rate": 6.8854233308587905e-06, "loss": 17.7158, "step": 21644 }, { "epoch": 0.39565320708501656, "grad_norm": 7.8912448083419, "learning_rate": 6.885149167946185e-06, "loss": 18.0542, "step": 21645 }, { "epoch": 0.3956714862814631, "grad_norm": 5.404480522763143, "learning_rate": 6.884874998426296e-06, "loss": 16.9571, "step": 21646 }, { "epoch": 0.39568976547790957, "grad_norm": 6.247336627791287, "learning_rate": 6.8846008223000825e-06, "loss": 17.5024, "step": 21647 }, { "epoch": 0.3957080446743561, "grad_norm": 7.941220192674957, "learning_rate": 6.884326639568508e-06, "loss": 17.8973, "step": 21648 }, { "epoch": 0.39572632387080264, "grad_norm": 6.379053972758212, "learning_rate": 6.88405245023253e-06, "loss": 17.4956, "step": 21649 }, { "epoch": 0.3957446030672492, "grad_norm": 8.387081659392715, "learning_rate": 6.883778254293113e-06, "loss": 18.1086, "step": 21650 }, { "epoch": 0.3957628822636957, "grad_norm": 7.3948425362543375, "learning_rate": 6.883504051751217e-06, "loss": 17.8278, "step": 21651 }, { "epoch": 0.3957811614601422, "grad_norm": 8.063472087585946, "learning_rate": 6.883229842607804e-06, "loss": 18.4844, "step": 21652 }, { "epoch": 0.39579944065658873, "grad_norm": 6.357610352214988, "learning_rate": 6.882955626863832e-06, "loss": 17.4236, "step": 21653 }, { "epoch": 0.39581771985303527, "grad_norm": 5.873895105905679, "learning_rate": 6.882681404520263e-06, "loss": 17.1778, "step": 21654 }, { "epoch": 0.3958359990494818, "grad_norm": 7.428820280037442, "learning_rate": 6.882407175578061e-06, "loss": 18.0671, "step": 21655 }, { "epoch": 0.3958542782459283, "grad_norm": 7.706670152117786, "learning_rate": 6.8821329400381844e-06, "loss": 17.9579, "step": 21656 }, { "epoch": 0.3958725574423748, "grad_norm": 7.5884015572154855, "learning_rate": 6.881858697901596e-06, "loss": 17.7861, "step": 21657 }, { "epoch": 0.39589083663882135, "grad_norm": 7.288277868186172, "learning_rate": 6.881584449169254e-06, "loss": 18.0603, "step": 21658 }, { "epoch": 0.3959091158352679, "grad_norm": 5.498976681355198, "learning_rate": 6.881310193842123e-06, "loss": 17.179, "step": 21659 }, { "epoch": 0.3959273950317144, "grad_norm": 6.68694624285585, "learning_rate": 6.881035931921164e-06, "loss": 17.604, "step": 21660 }, { "epoch": 0.3959456742281609, "grad_norm": 5.453455096321566, "learning_rate": 6.880761663407336e-06, "loss": 17.2405, "step": 21661 }, { "epoch": 0.39596395342460744, "grad_norm": 6.035074401662799, "learning_rate": 6.880487388301603e-06, "loss": 17.5, "step": 21662 }, { "epoch": 0.395982232621054, "grad_norm": 9.092492814031782, "learning_rate": 6.880213106604923e-06, "loss": 17.4662, "step": 21663 }, { "epoch": 0.3960005118175005, "grad_norm": 5.664062192085136, "learning_rate": 6.87993881831826e-06, "loss": 17.2395, "step": 21664 }, { "epoch": 0.39601879101394705, "grad_norm": 7.052725853189261, "learning_rate": 6.879664523442575e-06, "loss": 17.7064, "step": 21665 }, { "epoch": 0.3960370702103935, "grad_norm": 7.638767286611265, "learning_rate": 6.87939022197883e-06, "loss": 18.2496, "step": 21666 }, { "epoch": 0.39605534940684006, "grad_norm": 5.9105272706982594, "learning_rate": 6.879115913927984e-06, "loss": 17.2275, "step": 21667 }, { "epoch": 0.3960736286032866, "grad_norm": 5.679793639716042, "learning_rate": 6.878841599290998e-06, "loss": 17.4539, "step": 21668 }, { "epoch": 0.39609190779973313, "grad_norm": 6.795324756701742, "learning_rate": 6.878567278068838e-06, "loss": 17.7713, "step": 21669 }, { "epoch": 0.39611018699617967, "grad_norm": 5.698620339964075, "learning_rate": 6.8782929502624615e-06, "loss": 17.2789, "step": 21670 }, { "epoch": 0.39612846619262615, "grad_norm": 6.147221977014783, "learning_rate": 6.878018615872832e-06, "loss": 17.3817, "step": 21671 }, { "epoch": 0.3961467453890727, "grad_norm": 6.531815043610964, "learning_rate": 6.877744274900911e-06, "loss": 17.4807, "step": 21672 }, { "epoch": 0.3961650245855192, "grad_norm": 6.115114026166975, "learning_rate": 6.8774699273476576e-06, "loss": 17.2749, "step": 21673 }, { "epoch": 0.39618330378196576, "grad_norm": 7.012860703434272, "learning_rate": 6.877195573214035e-06, "loss": 17.7193, "step": 21674 }, { "epoch": 0.3962015829784123, "grad_norm": 6.41914177435871, "learning_rate": 6.876921212501004e-06, "loss": 17.3767, "step": 21675 }, { "epoch": 0.3962198621748588, "grad_norm": 6.946961562007794, "learning_rate": 6.876646845209529e-06, "loss": 17.9227, "step": 21676 }, { "epoch": 0.3962381413713053, "grad_norm": 6.488254765236719, "learning_rate": 6.876372471340569e-06, "loss": 17.7707, "step": 21677 }, { "epoch": 0.39625642056775184, "grad_norm": 7.564070027824658, "learning_rate": 6.876098090895086e-06, "loss": 18.2453, "step": 21678 }, { "epoch": 0.3962746997641984, "grad_norm": 6.780838371917999, "learning_rate": 6.875823703874043e-06, "loss": 17.5928, "step": 21679 }, { "epoch": 0.3962929789606449, "grad_norm": 7.090326527895478, "learning_rate": 6.8755493102784e-06, "loss": 17.6829, "step": 21680 }, { "epoch": 0.3963112581570914, "grad_norm": 6.033272197405425, "learning_rate": 6.875274910109117e-06, "loss": 17.3839, "step": 21681 }, { "epoch": 0.39632953735353793, "grad_norm": 5.981024208762855, "learning_rate": 6.875000503367162e-06, "loss": 17.2835, "step": 21682 }, { "epoch": 0.39634781654998447, "grad_norm": 6.067834244801076, "learning_rate": 6.874726090053492e-06, "loss": 17.361, "step": 21683 }, { "epoch": 0.396366095746431, "grad_norm": 24.24281158832486, "learning_rate": 6.874451670169069e-06, "loss": 17.6624, "step": 21684 }, { "epoch": 0.39638437494287754, "grad_norm": 5.5553463901806985, "learning_rate": 6.874177243714856e-06, "loss": 17.2165, "step": 21685 }, { "epoch": 0.396402654139324, "grad_norm": 6.062006121777724, "learning_rate": 6.8739028106918135e-06, "loss": 17.4607, "step": 21686 }, { "epoch": 0.39642093333577055, "grad_norm": 7.17781354196199, "learning_rate": 6.873628371100906e-06, "loss": 18.1326, "step": 21687 }, { "epoch": 0.3964392125322171, "grad_norm": 7.301042531092424, "learning_rate": 6.873353924943091e-06, "loss": 17.8408, "step": 21688 }, { "epoch": 0.3964574917286636, "grad_norm": 7.357280041350993, "learning_rate": 6.873079472219335e-06, "loss": 18.2311, "step": 21689 }, { "epoch": 0.3964757709251101, "grad_norm": 5.448759267322615, "learning_rate": 6.8728050129305975e-06, "loss": 17.1483, "step": 21690 }, { "epoch": 0.39649405012155664, "grad_norm": 5.594364101224168, "learning_rate": 6.872530547077841e-06, "loss": 17.3468, "step": 21691 }, { "epoch": 0.3965123293180032, "grad_norm": 5.505243172890195, "learning_rate": 6.872256074662027e-06, "loss": 17.0655, "step": 21692 }, { "epoch": 0.3965306085144497, "grad_norm": 5.557656220302946, "learning_rate": 6.8719815956841196e-06, "loss": 17.3279, "step": 21693 }, { "epoch": 0.39654888771089625, "grad_norm": 6.88965028193292, "learning_rate": 6.8717071101450785e-06, "loss": 17.6607, "step": 21694 }, { "epoch": 0.3965671669073427, "grad_norm": 5.57957657299959, "learning_rate": 6.871432618045864e-06, "loss": 17.1442, "step": 21695 }, { "epoch": 0.39658544610378926, "grad_norm": 6.508591444589206, "learning_rate": 6.871158119387442e-06, "loss": 17.5329, "step": 21696 }, { "epoch": 0.3966037253002358, "grad_norm": 7.373474480849557, "learning_rate": 6.870883614170774e-06, "loss": 17.3962, "step": 21697 }, { "epoch": 0.39662200449668233, "grad_norm": 5.911328579089407, "learning_rate": 6.8706091023968215e-06, "loss": 17.3305, "step": 21698 }, { "epoch": 0.39664028369312887, "grad_norm": 5.494601161598914, "learning_rate": 6.870334584066546e-06, "loss": 17.2719, "step": 21699 }, { "epoch": 0.39665856288957535, "grad_norm": 7.0876504129624385, "learning_rate": 6.87006005918091e-06, "loss": 17.8883, "step": 21700 }, { "epoch": 0.3966768420860219, "grad_norm": 6.69949522211074, "learning_rate": 6.8697855277408756e-06, "loss": 17.6359, "step": 21701 }, { "epoch": 0.3966951212824684, "grad_norm": 7.254559317539576, "learning_rate": 6.869510989747404e-06, "loss": 17.7826, "step": 21702 }, { "epoch": 0.39671340047891496, "grad_norm": 7.387419531232121, "learning_rate": 6.869236445201462e-06, "loss": 17.9149, "step": 21703 }, { "epoch": 0.3967316796753615, "grad_norm": 6.903547161426229, "learning_rate": 6.8689618941040045e-06, "loss": 17.7069, "step": 21704 }, { "epoch": 0.396749958871808, "grad_norm": 6.6084817588078755, "learning_rate": 6.868687336455999e-06, "loss": 17.6637, "step": 21705 }, { "epoch": 0.3967682380682545, "grad_norm": 6.407653100612558, "learning_rate": 6.868412772258407e-06, "loss": 17.4011, "step": 21706 }, { "epoch": 0.39678651726470104, "grad_norm": 6.483539091650716, "learning_rate": 6.868138201512191e-06, "loss": 17.5027, "step": 21707 }, { "epoch": 0.3968047964611476, "grad_norm": 6.656166325296128, "learning_rate": 6.867863624218313e-06, "loss": 17.9308, "step": 21708 }, { "epoch": 0.3968230756575941, "grad_norm": 6.37100398672029, "learning_rate": 6.867589040377734e-06, "loss": 17.5063, "step": 21709 }, { "epoch": 0.3968413548540406, "grad_norm": 6.419081622356355, "learning_rate": 6.867314449991418e-06, "loss": 17.6277, "step": 21710 }, { "epoch": 0.39685963405048713, "grad_norm": 7.351393233610686, "learning_rate": 6.867039853060326e-06, "loss": 17.8415, "step": 21711 }, { "epoch": 0.39687791324693367, "grad_norm": 8.340268986995127, "learning_rate": 6.866765249585422e-06, "loss": 18.6467, "step": 21712 }, { "epoch": 0.3968961924433802, "grad_norm": 5.347399172394467, "learning_rate": 6.866490639567667e-06, "loss": 16.9904, "step": 21713 }, { "epoch": 0.39691447163982674, "grad_norm": 6.333002298780255, "learning_rate": 6.8662160230080254e-06, "loss": 17.5916, "step": 21714 }, { "epoch": 0.3969327508362732, "grad_norm": 6.659012769856985, "learning_rate": 6.8659413999074574e-06, "loss": 17.5251, "step": 21715 }, { "epoch": 0.39695103003271975, "grad_norm": 6.221801363568825, "learning_rate": 6.865666770266928e-06, "loss": 17.5295, "step": 21716 }, { "epoch": 0.3969693092291663, "grad_norm": 7.381124587032561, "learning_rate": 6.865392134087398e-06, "loss": 17.8972, "step": 21717 }, { "epoch": 0.3969875884256128, "grad_norm": 7.784792840951576, "learning_rate": 6.86511749136983e-06, "loss": 18.134, "step": 21718 }, { "epoch": 0.39700586762205936, "grad_norm": 5.986191625320928, "learning_rate": 6.864842842115187e-06, "loss": 17.208, "step": 21719 }, { "epoch": 0.39702414681850584, "grad_norm": 6.727676440293558, "learning_rate": 6.864568186324432e-06, "loss": 17.936, "step": 21720 }, { "epoch": 0.3970424260149524, "grad_norm": 5.727698637854074, "learning_rate": 6.864293523998529e-06, "loss": 17.2057, "step": 21721 }, { "epoch": 0.3970607052113989, "grad_norm": 6.279105869482758, "learning_rate": 6.864018855138436e-06, "loss": 17.5633, "step": 21722 }, { "epoch": 0.39707898440784545, "grad_norm": 6.476167616880967, "learning_rate": 6.86374417974512e-06, "loss": 17.5121, "step": 21723 }, { "epoch": 0.39709726360429193, "grad_norm": 7.9602930994660746, "learning_rate": 6.8634694978195436e-06, "loss": 18.1208, "step": 21724 }, { "epoch": 0.39711554280073846, "grad_norm": 7.379102487241958, "learning_rate": 6.863194809362666e-06, "loss": 17.898, "step": 21725 }, { "epoch": 0.397133821997185, "grad_norm": 8.96666518078975, "learning_rate": 6.862920114375453e-06, "loss": 18.7539, "step": 21726 }, { "epoch": 0.39715210119363153, "grad_norm": 6.3219931753442, "learning_rate": 6.862645412858867e-06, "loss": 17.4428, "step": 21727 }, { "epoch": 0.39717038039007807, "grad_norm": 6.353760273395351, "learning_rate": 6.862370704813871e-06, "loss": 17.3138, "step": 21728 }, { "epoch": 0.39718865958652455, "grad_norm": 5.657058150834073, "learning_rate": 6.862095990241426e-06, "loss": 17.1173, "step": 21729 }, { "epoch": 0.3972069387829711, "grad_norm": 6.049000545032745, "learning_rate": 6.861821269142498e-06, "loss": 17.4315, "step": 21730 }, { "epoch": 0.3972252179794176, "grad_norm": 8.969556896900917, "learning_rate": 6.8615465415180475e-06, "loss": 18.502, "step": 21731 }, { "epoch": 0.39724349717586416, "grad_norm": 6.173650364088976, "learning_rate": 6.861271807369037e-06, "loss": 17.4822, "step": 21732 }, { "epoch": 0.3972617763723107, "grad_norm": 6.580220131505144, "learning_rate": 6.86099706669643e-06, "loss": 17.5246, "step": 21733 }, { "epoch": 0.3972800555687572, "grad_norm": 6.1567155291020494, "learning_rate": 6.8607223195011915e-06, "loss": 17.4647, "step": 21734 }, { "epoch": 0.3972983347652037, "grad_norm": 7.324932987615221, "learning_rate": 6.860447565784281e-06, "loss": 18.156, "step": 21735 }, { "epoch": 0.39731661396165024, "grad_norm": 8.103877960545196, "learning_rate": 6.8601728055466635e-06, "loss": 18.2347, "step": 21736 }, { "epoch": 0.3973348931580968, "grad_norm": 6.70610024045635, "learning_rate": 6.859898038789301e-06, "loss": 17.7807, "step": 21737 }, { "epoch": 0.3973531723545433, "grad_norm": 5.7562338435786415, "learning_rate": 6.85962326551316e-06, "loss": 17.2446, "step": 21738 }, { "epoch": 0.3973714515509898, "grad_norm": 7.755936289396021, "learning_rate": 6.8593484857192e-06, "loss": 17.9505, "step": 21739 }, { "epoch": 0.39738973074743633, "grad_norm": 6.438809062253919, "learning_rate": 6.859073699408383e-06, "loss": 17.6234, "step": 21740 }, { "epoch": 0.39740800994388287, "grad_norm": 6.267715065425261, "learning_rate": 6.858798906581675e-06, "loss": 17.6766, "step": 21741 }, { "epoch": 0.3974262891403294, "grad_norm": 7.4280055700738306, "learning_rate": 6.858524107240039e-06, "loss": 17.7466, "step": 21742 }, { "epoch": 0.39744456833677594, "grad_norm": 6.236803296639654, "learning_rate": 6.858249301384437e-06, "loss": 17.3597, "step": 21743 }, { "epoch": 0.3974628475332224, "grad_norm": 6.489969263565319, "learning_rate": 6.8579744890158305e-06, "loss": 17.6892, "step": 21744 }, { "epoch": 0.39748112672966895, "grad_norm": 5.842214122340492, "learning_rate": 6.857699670135186e-06, "loss": 17.2405, "step": 21745 }, { "epoch": 0.3974994059261155, "grad_norm": 7.86180178100099, "learning_rate": 6.857424844743465e-06, "loss": 18.4659, "step": 21746 }, { "epoch": 0.397517685122562, "grad_norm": 7.684834211311984, "learning_rate": 6.857150012841633e-06, "loss": 18.1623, "step": 21747 }, { "epoch": 0.39753596431900856, "grad_norm": 4.927592761924374, "learning_rate": 6.8568751744306505e-06, "loss": 16.9394, "step": 21748 }, { "epoch": 0.39755424351545504, "grad_norm": 6.420163071271816, "learning_rate": 6.85660032951148e-06, "loss": 17.5458, "step": 21749 }, { "epoch": 0.3975725227119016, "grad_norm": 6.5515169109406415, "learning_rate": 6.856325478085087e-06, "loss": 17.6797, "step": 21750 }, { "epoch": 0.3975908019083481, "grad_norm": 6.616783364994676, "learning_rate": 6.856050620152435e-06, "loss": 17.6536, "step": 21751 }, { "epoch": 0.39760908110479465, "grad_norm": 6.223348051198621, "learning_rate": 6.8557757557144874e-06, "loss": 17.3968, "step": 21752 }, { "epoch": 0.3976273603012412, "grad_norm": 6.569699690737651, "learning_rate": 6.855500884772206e-06, "loss": 17.5949, "step": 21753 }, { "epoch": 0.39764563949768766, "grad_norm": 6.85853425702792, "learning_rate": 6.855226007326554e-06, "loss": 17.9053, "step": 21754 }, { "epoch": 0.3976639186941342, "grad_norm": 6.80916540211321, "learning_rate": 6.854951123378497e-06, "loss": 17.6419, "step": 21755 }, { "epoch": 0.39768219789058074, "grad_norm": 5.705231847810381, "learning_rate": 6.854676232928997e-06, "loss": 17.3183, "step": 21756 }, { "epoch": 0.39770047708702727, "grad_norm": 6.397163156391924, "learning_rate": 6.854401335979019e-06, "loss": 17.4293, "step": 21757 }, { "epoch": 0.39771875628347375, "grad_norm": 7.026852961160377, "learning_rate": 6.854126432529523e-06, "loss": 17.9778, "step": 21758 }, { "epoch": 0.3977370354799203, "grad_norm": 6.489656750077679, "learning_rate": 6.853851522581476e-06, "loss": 17.6702, "step": 21759 }, { "epoch": 0.3977553146763668, "grad_norm": 5.046174196276279, "learning_rate": 6.85357660613584e-06, "loss": 16.9933, "step": 21760 }, { "epoch": 0.39777359387281336, "grad_norm": 5.231458690561238, "learning_rate": 6.853301683193579e-06, "loss": 17.0756, "step": 21761 }, { "epoch": 0.3977918730692599, "grad_norm": 5.865646842255912, "learning_rate": 6.853026753755656e-06, "loss": 17.4636, "step": 21762 }, { "epoch": 0.3978101522657064, "grad_norm": 5.577294626414456, "learning_rate": 6.852751817823035e-06, "loss": 17.1527, "step": 21763 }, { "epoch": 0.3978284314621529, "grad_norm": 6.030721027048702, "learning_rate": 6.85247687539668e-06, "loss": 17.4301, "step": 21764 }, { "epoch": 0.39784671065859945, "grad_norm": 5.51079709212565, "learning_rate": 6.8522019264775544e-06, "loss": 17.3503, "step": 21765 }, { "epoch": 0.397864989855046, "grad_norm": 6.2641449921534775, "learning_rate": 6.851926971066623e-06, "loss": 17.3794, "step": 21766 }, { "epoch": 0.3978832690514925, "grad_norm": 6.393330466474008, "learning_rate": 6.851652009164846e-06, "loss": 17.3104, "step": 21767 }, { "epoch": 0.397901548247939, "grad_norm": 5.537558818735297, "learning_rate": 6.851377040773189e-06, "loss": 17.3696, "step": 21768 }, { "epoch": 0.39791982744438553, "grad_norm": 6.64380040405659, "learning_rate": 6.851102065892618e-06, "loss": 17.5596, "step": 21769 }, { "epoch": 0.39793810664083207, "grad_norm": 6.084923088989286, "learning_rate": 6.850827084524094e-06, "loss": 17.4808, "step": 21770 }, { "epoch": 0.3979563858372786, "grad_norm": 6.786046481175895, "learning_rate": 6.850552096668583e-06, "loss": 16.8464, "step": 21771 }, { "epoch": 0.39797466503372514, "grad_norm": 7.921359138492886, "learning_rate": 6.850277102327047e-06, "loss": 17.8138, "step": 21772 }, { "epoch": 0.3979929442301716, "grad_norm": 6.3057469313954275, "learning_rate": 6.8500021015004495e-06, "loss": 17.6966, "step": 21773 }, { "epoch": 0.39801122342661815, "grad_norm": 5.978307516896791, "learning_rate": 6.849727094189755e-06, "loss": 17.4407, "step": 21774 }, { "epoch": 0.3980295026230647, "grad_norm": 5.361668479899914, "learning_rate": 6.849452080395928e-06, "loss": 16.9982, "step": 21775 }, { "epoch": 0.3980477818195112, "grad_norm": 5.436084862090296, "learning_rate": 6.849177060119931e-06, "loss": 17.0593, "step": 21776 }, { "epoch": 0.39806606101595776, "grad_norm": 7.219011796924425, "learning_rate": 6.84890203336273e-06, "loss": 17.4976, "step": 21777 }, { "epoch": 0.39808434021240424, "grad_norm": 6.542009008742553, "learning_rate": 6.848627000125288e-06, "loss": 17.3236, "step": 21778 }, { "epoch": 0.3981026194088508, "grad_norm": 6.471159757398091, "learning_rate": 6.8483519604085695e-06, "loss": 17.8908, "step": 21779 }, { "epoch": 0.3981208986052973, "grad_norm": 8.547507575517846, "learning_rate": 6.848076914213536e-06, "loss": 18.24, "step": 21780 }, { "epoch": 0.39813917780174385, "grad_norm": 5.05408154870597, "learning_rate": 6.847801861541154e-06, "loss": 16.8201, "step": 21781 }, { "epoch": 0.3981574569981904, "grad_norm": 6.572162405922015, "learning_rate": 6.847526802392386e-06, "loss": 17.6547, "step": 21782 }, { "epoch": 0.39817573619463686, "grad_norm": 5.963744775318282, "learning_rate": 6.847251736768199e-06, "loss": 17.384, "step": 21783 }, { "epoch": 0.3981940153910834, "grad_norm": 5.2809965346776515, "learning_rate": 6.846976664669553e-06, "loss": 17.0352, "step": 21784 }, { "epoch": 0.39821229458752994, "grad_norm": 7.468870171175749, "learning_rate": 6.846701586097415e-06, "loss": 17.8976, "step": 21785 }, { "epoch": 0.39823057378397647, "grad_norm": 5.501253658388204, "learning_rate": 6.846426501052748e-06, "loss": 17.2988, "step": 21786 }, { "epoch": 0.398248852980423, "grad_norm": 8.552318000196248, "learning_rate": 6.846151409536516e-06, "loss": 18.2101, "step": 21787 }, { "epoch": 0.3982671321768695, "grad_norm": 5.161676492978516, "learning_rate": 6.845876311549684e-06, "loss": 17.0767, "step": 21788 }, { "epoch": 0.398285411373316, "grad_norm": 6.1466095472310425, "learning_rate": 6.845601207093215e-06, "loss": 17.4433, "step": 21789 }, { "epoch": 0.39830369056976256, "grad_norm": 6.246326853145241, "learning_rate": 6.845326096168074e-06, "loss": 17.5201, "step": 21790 }, { "epoch": 0.3983219697662091, "grad_norm": 7.085188300824559, "learning_rate": 6.8450509787752255e-06, "loss": 18.0179, "step": 21791 }, { "epoch": 0.3983402489626556, "grad_norm": 6.9875426997890075, "learning_rate": 6.844775854915633e-06, "loss": 17.6406, "step": 21792 }, { "epoch": 0.3983585281591021, "grad_norm": 4.239942086341225, "learning_rate": 6.8445007245902625e-06, "loss": 16.7594, "step": 21793 }, { "epoch": 0.39837680735554865, "grad_norm": 6.171533553468372, "learning_rate": 6.844225587800077e-06, "loss": 17.6613, "step": 21794 }, { "epoch": 0.3983950865519952, "grad_norm": 6.560016173062775, "learning_rate": 6.843950444546039e-06, "loss": 17.5954, "step": 21795 }, { "epoch": 0.3984133657484417, "grad_norm": 6.237759203957921, "learning_rate": 6.843675294829115e-06, "loss": 17.7816, "step": 21796 }, { "epoch": 0.3984316449448882, "grad_norm": 7.600823146795452, "learning_rate": 6.843400138650271e-06, "loss": 17.7881, "step": 21797 }, { "epoch": 0.39844992414133473, "grad_norm": 8.109419437101522, "learning_rate": 6.843124976010469e-06, "loss": 17.6862, "step": 21798 }, { "epoch": 0.39846820333778127, "grad_norm": 6.655250879466462, "learning_rate": 6.842849806910672e-06, "loss": 17.8075, "step": 21799 }, { "epoch": 0.3984864825342278, "grad_norm": 5.3213564636102735, "learning_rate": 6.8425746313518485e-06, "loss": 17.0101, "step": 21800 }, { "epoch": 0.39850476173067434, "grad_norm": 8.147329827383535, "learning_rate": 6.842299449334959e-06, "loss": 17.7536, "step": 21801 }, { "epoch": 0.3985230409271208, "grad_norm": 5.804868824451375, "learning_rate": 6.842024260860971e-06, "loss": 17.4383, "step": 21802 }, { "epoch": 0.39854132012356736, "grad_norm": 6.1724928884144665, "learning_rate": 6.841749065930847e-06, "loss": 17.2015, "step": 21803 }, { "epoch": 0.3985595993200139, "grad_norm": 6.715634459644148, "learning_rate": 6.841473864545553e-06, "loss": 17.6493, "step": 21804 }, { "epoch": 0.3985778785164604, "grad_norm": 6.679022305429887, "learning_rate": 6.841198656706053e-06, "loss": 17.8111, "step": 21805 }, { "epoch": 0.39859615771290696, "grad_norm": 7.257315299953628, "learning_rate": 6.840923442413311e-06, "loss": 17.1689, "step": 21806 }, { "epoch": 0.39861443690935344, "grad_norm": 8.573541639910738, "learning_rate": 6.8406482216682934e-06, "loss": 18.0391, "step": 21807 }, { "epoch": 0.3986327161058, "grad_norm": 6.4849479840258555, "learning_rate": 6.840372994471961e-06, "loss": 17.541, "step": 21808 }, { "epoch": 0.3986509953022465, "grad_norm": 8.349189848762084, "learning_rate": 6.8400977608252814e-06, "loss": 18.3481, "step": 21809 }, { "epoch": 0.39866927449869305, "grad_norm": 10.121523300115816, "learning_rate": 6.839822520729221e-06, "loss": 18.6284, "step": 21810 }, { "epoch": 0.3986875536951396, "grad_norm": 7.817159093429263, "learning_rate": 6.839547274184741e-06, "loss": 17.7238, "step": 21811 }, { "epoch": 0.39870583289158606, "grad_norm": 7.011889955934559, "learning_rate": 6.8392720211928075e-06, "loss": 17.7431, "step": 21812 }, { "epoch": 0.3987241120880326, "grad_norm": 6.156123959449216, "learning_rate": 6.838996761754384e-06, "loss": 17.685, "step": 21813 }, { "epoch": 0.39874239128447914, "grad_norm": 6.015380396421697, "learning_rate": 6.838721495870438e-06, "loss": 17.2928, "step": 21814 }, { "epoch": 0.39876067048092567, "grad_norm": 5.2348778933493145, "learning_rate": 6.838446223541933e-06, "loss": 17.0707, "step": 21815 }, { "epoch": 0.3987789496773722, "grad_norm": 5.51539712070171, "learning_rate": 6.838170944769833e-06, "loss": 17.2077, "step": 21816 }, { "epoch": 0.3987972288738187, "grad_norm": 6.562161452212529, "learning_rate": 6.837895659555103e-06, "loss": 17.5551, "step": 21817 }, { "epoch": 0.3988155080702652, "grad_norm": 5.654587977776979, "learning_rate": 6.837620367898708e-06, "loss": 17.2969, "step": 21818 }, { "epoch": 0.39883378726671176, "grad_norm": 7.475892527216948, "learning_rate": 6.837345069801613e-06, "loss": 17.2382, "step": 21819 }, { "epoch": 0.3988520664631583, "grad_norm": 7.995946912514331, "learning_rate": 6.837069765264783e-06, "loss": 18.3482, "step": 21820 }, { "epoch": 0.39887034565960483, "grad_norm": 7.497912190028149, "learning_rate": 6.8367944542891854e-06, "loss": 18.0156, "step": 21821 }, { "epoch": 0.3988886248560513, "grad_norm": 6.730666641172929, "learning_rate": 6.836519136875779e-06, "loss": 17.6704, "step": 21822 }, { "epoch": 0.39890690405249785, "grad_norm": 6.453252477793417, "learning_rate": 6.836243813025534e-06, "loss": 17.8398, "step": 21823 }, { "epoch": 0.3989251832489444, "grad_norm": 6.75137426284157, "learning_rate": 6.835968482739415e-06, "loss": 17.5854, "step": 21824 }, { "epoch": 0.3989434624453909, "grad_norm": 8.37417003563625, "learning_rate": 6.835693146018384e-06, "loss": 18.5165, "step": 21825 }, { "epoch": 0.3989617416418374, "grad_norm": 6.2616493045157124, "learning_rate": 6.8354178028634084e-06, "loss": 17.6568, "step": 21826 }, { "epoch": 0.39898002083828393, "grad_norm": 8.738659647010948, "learning_rate": 6.8351424532754515e-06, "loss": 18.6223, "step": 21827 }, { "epoch": 0.39899830003473047, "grad_norm": 6.4634686275494095, "learning_rate": 6.834867097255482e-06, "loss": 17.3887, "step": 21828 }, { "epoch": 0.399016579231177, "grad_norm": 6.354428864865202, "learning_rate": 6.834591734804461e-06, "loss": 17.5772, "step": 21829 }, { "epoch": 0.39903485842762354, "grad_norm": 7.608988584982392, "learning_rate": 6.834316365923355e-06, "loss": 18.2136, "step": 21830 }, { "epoch": 0.39905313762407, "grad_norm": 5.989923457364409, "learning_rate": 6.834040990613129e-06, "loss": 17.2518, "step": 21831 }, { "epoch": 0.39907141682051656, "grad_norm": 7.747319358810052, "learning_rate": 6.83376560887475e-06, "loss": 18.2965, "step": 21832 }, { "epoch": 0.3990896960169631, "grad_norm": 7.6421741538121095, "learning_rate": 6.8334902207091804e-06, "loss": 17.9195, "step": 21833 }, { "epoch": 0.3991079752134096, "grad_norm": 6.326266384956976, "learning_rate": 6.833214826117387e-06, "loss": 17.6251, "step": 21834 }, { "epoch": 0.39912625440985616, "grad_norm": 5.960369971268348, "learning_rate": 6.832939425100336e-06, "loss": 17.3625, "step": 21835 }, { "epoch": 0.39914453360630264, "grad_norm": 5.766202353274833, "learning_rate": 6.832664017658988e-06, "loss": 17.0969, "step": 21836 }, { "epoch": 0.3991628128027492, "grad_norm": 7.035487552730544, "learning_rate": 6.832388603794315e-06, "loss": 17.8479, "step": 21837 }, { "epoch": 0.3991810919991957, "grad_norm": 5.930590917364918, "learning_rate": 6.832113183507278e-06, "loss": 17.346, "step": 21838 }, { "epoch": 0.39919937119564225, "grad_norm": 5.197521251504096, "learning_rate": 6.831837756798842e-06, "loss": 16.8392, "step": 21839 }, { "epoch": 0.3992176503920888, "grad_norm": 5.994831294123699, "learning_rate": 6.831562323669976e-06, "loss": 17.6677, "step": 21840 }, { "epoch": 0.39923592958853527, "grad_norm": 8.895594948925588, "learning_rate": 6.831286884121642e-06, "loss": 17.7985, "step": 21841 }, { "epoch": 0.3992542087849818, "grad_norm": 6.367047764293951, "learning_rate": 6.831011438154805e-06, "loss": 17.4813, "step": 21842 }, { "epoch": 0.39927248798142834, "grad_norm": 6.123505316100374, "learning_rate": 6.8307359857704324e-06, "loss": 17.084, "step": 21843 }, { "epoch": 0.3992907671778749, "grad_norm": 4.732171505471359, "learning_rate": 6.8304605269694904e-06, "loss": 16.8162, "step": 21844 }, { "epoch": 0.3993090463743214, "grad_norm": 6.73229843461191, "learning_rate": 6.8301850617529405e-06, "loss": 17.8678, "step": 21845 }, { "epoch": 0.3993273255707679, "grad_norm": 7.1852536067024175, "learning_rate": 6.829909590121752e-06, "loss": 18.1702, "step": 21846 }, { "epoch": 0.3993456047672144, "grad_norm": 7.180394346522405, "learning_rate": 6.82963411207689e-06, "loss": 17.9493, "step": 21847 }, { "epoch": 0.39936388396366096, "grad_norm": 4.71416536056585, "learning_rate": 6.82935862761932e-06, "loss": 16.8741, "step": 21848 }, { "epoch": 0.3993821631601075, "grad_norm": 6.544736596933013, "learning_rate": 6.8290831367500055e-06, "loss": 17.6977, "step": 21849 }, { "epoch": 0.39940044235655403, "grad_norm": 5.202792661730387, "learning_rate": 6.828807639469914e-06, "loss": 17.0273, "step": 21850 }, { "epoch": 0.3994187215530005, "grad_norm": 5.843070617820656, "learning_rate": 6.828532135780008e-06, "loss": 17.3643, "step": 21851 }, { "epoch": 0.39943700074944705, "grad_norm": 7.029189764478648, "learning_rate": 6.8282566256812584e-06, "loss": 17.6621, "step": 21852 }, { "epoch": 0.3994552799458936, "grad_norm": 10.042292797677177, "learning_rate": 6.827981109174627e-06, "loss": 18.8844, "step": 21853 }, { "epoch": 0.3994735591423401, "grad_norm": 5.584993858822853, "learning_rate": 6.82770558626108e-06, "loss": 17.2078, "step": 21854 }, { "epoch": 0.39949183833878665, "grad_norm": 7.134501801572942, "learning_rate": 6.8274300569415845e-06, "loss": 17.7828, "step": 21855 }, { "epoch": 0.39951011753523313, "grad_norm": 6.869150866445629, "learning_rate": 6.827154521217104e-06, "loss": 17.4381, "step": 21856 }, { "epoch": 0.39952839673167967, "grad_norm": 6.065259526390333, "learning_rate": 6.826878979088607e-06, "loss": 17.2889, "step": 21857 }, { "epoch": 0.3995466759281262, "grad_norm": 7.516231241164341, "learning_rate": 6.826603430557056e-06, "loss": 18.2564, "step": 21858 }, { "epoch": 0.39956495512457274, "grad_norm": 7.000265027217838, "learning_rate": 6.826327875623419e-06, "loss": 17.8884, "step": 21859 }, { "epoch": 0.3995832343210192, "grad_norm": 5.977487675097315, "learning_rate": 6.826052314288662e-06, "loss": 17.355, "step": 21860 }, { "epoch": 0.39960151351746576, "grad_norm": 6.090132413359348, "learning_rate": 6.8257767465537496e-06, "loss": 17.4178, "step": 21861 }, { "epoch": 0.3996197927139123, "grad_norm": 6.309985701706487, "learning_rate": 6.825501172419649e-06, "loss": 17.4804, "step": 21862 }, { "epoch": 0.3996380719103588, "grad_norm": 6.169446371465011, "learning_rate": 6.825225591887323e-06, "loss": 17.4456, "step": 21863 }, { "epoch": 0.39965635110680536, "grad_norm": 5.208395078947912, "learning_rate": 6.824950004957741e-06, "loss": 16.8539, "step": 21864 }, { "epoch": 0.39967463030325184, "grad_norm": 6.798308654497195, "learning_rate": 6.824674411631868e-06, "loss": 17.6094, "step": 21865 }, { "epoch": 0.3996929094996984, "grad_norm": 6.468971865610881, "learning_rate": 6.82439881191067e-06, "loss": 17.5193, "step": 21866 }, { "epoch": 0.3997111886961449, "grad_norm": 6.129154578278078, "learning_rate": 6.824123205795111e-06, "loss": 17.3764, "step": 21867 }, { "epoch": 0.39972946789259145, "grad_norm": 6.4687796845339545, "learning_rate": 6.823847593286159e-06, "loss": 17.591, "step": 21868 }, { "epoch": 0.399747747089038, "grad_norm": 7.197323925496892, "learning_rate": 6.8235719743847795e-06, "loss": 17.6638, "step": 21869 }, { "epoch": 0.39976602628548447, "grad_norm": 5.736916691002986, "learning_rate": 6.823296349091939e-06, "loss": 17.1347, "step": 21870 }, { "epoch": 0.399784305481931, "grad_norm": 7.0601501110054645, "learning_rate": 6.823020717408603e-06, "loss": 17.9473, "step": 21871 }, { "epoch": 0.39980258467837754, "grad_norm": 7.433267916188017, "learning_rate": 6.822745079335736e-06, "loss": 17.7632, "step": 21872 }, { "epoch": 0.3998208638748241, "grad_norm": 5.870480293815363, "learning_rate": 6.822469434874307e-06, "loss": 17.1841, "step": 21873 }, { "epoch": 0.3998391430712706, "grad_norm": 6.783657703056416, "learning_rate": 6.8221937840252805e-06, "loss": 17.1817, "step": 21874 }, { "epoch": 0.3998574222677171, "grad_norm": 8.472302679485521, "learning_rate": 6.8219181267896225e-06, "loss": 18.1618, "step": 21875 }, { "epoch": 0.3998757014641636, "grad_norm": 7.146008847435934, "learning_rate": 6.821642463168301e-06, "loss": 18.0327, "step": 21876 }, { "epoch": 0.39989398066061016, "grad_norm": 6.834847710285364, "learning_rate": 6.821366793162279e-06, "loss": 17.6565, "step": 21877 }, { "epoch": 0.3999122598570567, "grad_norm": 5.498396193659899, "learning_rate": 6.8210911167725256e-06, "loss": 17.1138, "step": 21878 }, { "epoch": 0.39993053905350323, "grad_norm": 6.8552746884165146, "learning_rate": 6.820815434000007e-06, "loss": 17.6906, "step": 21879 }, { "epoch": 0.3999488182499497, "grad_norm": 6.639894723167643, "learning_rate": 6.8205397448456865e-06, "loss": 17.4614, "step": 21880 }, { "epoch": 0.39996709744639625, "grad_norm": 6.37637902059711, "learning_rate": 6.820264049310531e-06, "loss": 17.404, "step": 21881 }, { "epoch": 0.3999853766428428, "grad_norm": 6.069914179531985, "learning_rate": 6.8199883473955094e-06, "loss": 17.3451, "step": 21882 }, { "epoch": 0.4000036558392893, "grad_norm": 7.420894879857651, "learning_rate": 6.819712639101588e-06, "loss": 18.2089, "step": 21883 }, { "epoch": 0.40002193503573585, "grad_norm": 8.62891725858039, "learning_rate": 6.8194369244297294e-06, "loss": 18.5057, "step": 21884 }, { "epoch": 0.40004021423218233, "grad_norm": 6.593069105903263, "learning_rate": 6.819161203380903e-06, "loss": 17.5398, "step": 21885 }, { "epoch": 0.40005849342862887, "grad_norm": 7.625733575239709, "learning_rate": 6.818885475956075e-06, "loss": 18.1123, "step": 21886 }, { "epoch": 0.4000767726250754, "grad_norm": 6.542194823489286, "learning_rate": 6.81860974215621e-06, "loss": 17.9121, "step": 21887 }, { "epoch": 0.40009505182152194, "grad_norm": 5.649682944877462, "learning_rate": 6.8183340019822774e-06, "loss": 17.164, "step": 21888 }, { "epoch": 0.4001133310179685, "grad_norm": 8.18719447903527, "learning_rate": 6.81805825543524e-06, "loss": 18.4074, "step": 21889 }, { "epoch": 0.40013161021441496, "grad_norm": 5.971442482873152, "learning_rate": 6.817782502516068e-06, "loss": 17.4632, "step": 21890 }, { "epoch": 0.4001498894108615, "grad_norm": 7.544173298075092, "learning_rate": 6.817506743225725e-06, "loss": 18.1639, "step": 21891 }, { "epoch": 0.40016816860730803, "grad_norm": 6.585524425248248, "learning_rate": 6.817230977565179e-06, "loss": 17.309, "step": 21892 }, { "epoch": 0.40018644780375456, "grad_norm": 6.47608704865308, "learning_rate": 6.816955205535397e-06, "loss": 17.5814, "step": 21893 }, { "epoch": 0.40020472700020104, "grad_norm": 5.466854866379673, "learning_rate": 6.8166794271373425e-06, "loss": 17.2101, "step": 21894 }, { "epoch": 0.4002230061966476, "grad_norm": 5.8153603545612915, "learning_rate": 6.816403642371985e-06, "loss": 17.5636, "step": 21895 }, { "epoch": 0.4002412853930941, "grad_norm": 6.9351026441863315, "learning_rate": 6.816127851240291e-06, "loss": 17.5025, "step": 21896 }, { "epoch": 0.40025956458954065, "grad_norm": 7.636826122033852, "learning_rate": 6.815852053743227e-06, "loss": 18.0502, "step": 21897 }, { "epoch": 0.4002778437859872, "grad_norm": 6.464902345553054, "learning_rate": 6.815576249881758e-06, "loss": 17.5888, "step": 21898 }, { "epoch": 0.40029612298243367, "grad_norm": 6.749419499798225, "learning_rate": 6.815300439656852e-06, "loss": 17.5443, "step": 21899 }, { "epoch": 0.4003144021788802, "grad_norm": 6.58827908461623, "learning_rate": 6.815024623069476e-06, "loss": 17.5441, "step": 21900 }, { "epoch": 0.40033268137532674, "grad_norm": 5.761502253496959, "learning_rate": 6.8147488001205965e-06, "loss": 17.4418, "step": 21901 }, { "epoch": 0.4003509605717733, "grad_norm": 6.863697161354168, "learning_rate": 6.814472970811179e-06, "loss": 17.8328, "step": 21902 }, { "epoch": 0.4003692397682198, "grad_norm": 5.95862440492216, "learning_rate": 6.814197135142191e-06, "loss": 17.1633, "step": 21903 }, { "epoch": 0.4003875189646663, "grad_norm": 6.644524654873513, "learning_rate": 6.8139212931145995e-06, "loss": 17.3927, "step": 21904 }, { "epoch": 0.4004057981611128, "grad_norm": 6.860794251018615, "learning_rate": 6.813645444729372e-06, "loss": 18.0471, "step": 21905 }, { "epoch": 0.40042407735755936, "grad_norm": 6.001006930176777, "learning_rate": 6.813369589987474e-06, "loss": 17.3834, "step": 21906 }, { "epoch": 0.4004423565540059, "grad_norm": 6.18695976509492, "learning_rate": 6.813093728889874e-06, "loss": 17.2937, "step": 21907 }, { "epoch": 0.40046063575045243, "grad_norm": 6.4642207578544495, "learning_rate": 6.812817861437536e-06, "loss": 17.6252, "step": 21908 }, { "epoch": 0.4004789149468989, "grad_norm": 6.632278544468539, "learning_rate": 6.8125419876314295e-06, "loss": 17.5642, "step": 21909 }, { "epoch": 0.40049719414334545, "grad_norm": 7.109454895608768, "learning_rate": 6.812266107472522e-06, "loss": 17.7552, "step": 21910 }, { "epoch": 0.400515473339792, "grad_norm": 8.508668659805597, "learning_rate": 6.811990220961779e-06, "loss": 18.43, "step": 21911 }, { "epoch": 0.4005337525362385, "grad_norm": 7.185601574581772, "learning_rate": 6.811714328100165e-06, "loss": 17.9087, "step": 21912 }, { "epoch": 0.40055203173268505, "grad_norm": 5.920070425212109, "learning_rate": 6.81143842888865e-06, "loss": 17.1764, "step": 21913 }, { "epoch": 0.40057031092913153, "grad_norm": 6.966815940783827, "learning_rate": 6.811162523328203e-06, "loss": 17.8286, "step": 21914 }, { "epoch": 0.40058859012557807, "grad_norm": 6.586401880593733, "learning_rate": 6.810886611419787e-06, "loss": 17.8632, "step": 21915 }, { "epoch": 0.4006068693220246, "grad_norm": 6.063754430848439, "learning_rate": 6.81061069316437e-06, "loss": 17.4085, "step": 21916 }, { "epoch": 0.40062514851847114, "grad_norm": 6.32509247933547, "learning_rate": 6.810334768562921e-06, "loss": 17.5364, "step": 21917 }, { "epoch": 0.4006434277149177, "grad_norm": 6.724629586613398, "learning_rate": 6.8100588376164036e-06, "loss": 17.5292, "step": 21918 }, { "epoch": 0.40066170691136416, "grad_norm": 7.667001702775765, "learning_rate": 6.809782900325789e-06, "loss": 18.004, "step": 21919 }, { "epoch": 0.4006799861078107, "grad_norm": 7.118537808752815, "learning_rate": 6.809506956692041e-06, "loss": 17.9814, "step": 21920 }, { "epoch": 0.40069826530425723, "grad_norm": 6.9903226744488745, "learning_rate": 6.809231006716131e-06, "loss": 17.4455, "step": 21921 }, { "epoch": 0.40071654450070376, "grad_norm": 6.728524686486218, "learning_rate": 6.808955050399018e-06, "loss": 17.4874, "step": 21922 }, { "epoch": 0.4007348236971503, "grad_norm": 7.310463906228096, "learning_rate": 6.808679087741679e-06, "loss": 18.2281, "step": 21923 }, { "epoch": 0.4007531028935968, "grad_norm": 7.433994389988249, "learning_rate": 6.808403118745076e-06, "loss": 17.8506, "step": 21924 }, { "epoch": 0.4007713820900433, "grad_norm": 6.082764194422455, "learning_rate": 6.808127143410177e-06, "loss": 17.3403, "step": 21925 }, { "epoch": 0.40078966128648985, "grad_norm": 6.904402641846164, "learning_rate": 6.8078511617379485e-06, "loss": 17.7018, "step": 21926 }, { "epoch": 0.4008079404829364, "grad_norm": 7.1843567484424575, "learning_rate": 6.8075751737293575e-06, "loss": 18.1391, "step": 21927 }, { "epoch": 0.40082621967938287, "grad_norm": 8.693140802240887, "learning_rate": 6.8072991793853734e-06, "loss": 17.9093, "step": 21928 }, { "epoch": 0.4008444988758294, "grad_norm": 7.323779640948805, "learning_rate": 6.807023178706964e-06, "loss": 18.4544, "step": 21929 }, { "epoch": 0.40086277807227594, "grad_norm": 8.73760270559256, "learning_rate": 6.8067471716950935e-06, "loss": 18.1625, "step": 21930 }, { "epoch": 0.4008810572687225, "grad_norm": 8.111036553412072, "learning_rate": 6.8064711583507315e-06, "loss": 18.7604, "step": 21931 }, { "epoch": 0.400899336465169, "grad_norm": 7.337649671092504, "learning_rate": 6.806195138674845e-06, "loss": 17.4618, "step": 21932 }, { "epoch": 0.4009176156616155, "grad_norm": 6.14522667941267, "learning_rate": 6.8059191126684e-06, "loss": 17.2946, "step": 21933 }, { "epoch": 0.400935894858062, "grad_norm": 7.186178180742369, "learning_rate": 6.805643080332366e-06, "loss": 17.9071, "step": 21934 }, { "epoch": 0.40095417405450856, "grad_norm": 7.029971897302851, "learning_rate": 6.80536704166771e-06, "loss": 17.6502, "step": 21935 }, { "epoch": 0.4009724532509551, "grad_norm": 9.513560196022905, "learning_rate": 6.805090996675399e-06, "loss": 18.5135, "step": 21936 }, { "epoch": 0.40099073244740163, "grad_norm": 7.605591732838555, "learning_rate": 6.804814945356401e-06, "loss": 17.6754, "step": 21937 }, { "epoch": 0.4010090116438481, "grad_norm": 6.2261058022499824, "learning_rate": 6.804538887711684e-06, "loss": 17.4833, "step": 21938 }, { "epoch": 0.40102729084029465, "grad_norm": 6.843279889744721, "learning_rate": 6.804262823742214e-06, "loss": 18.0529, "step": 21939 }, { "epoch": 0.4010455700367412, "grad_norm": 6.811844571700934, "learning_rate": 6.803986753448956e-06, "loss": 17.7461, "step": 21940 }, { "epoch": 0.4010638492331877, "grad_norm": 6.2373444016166015, "learning_rate": 6.803710676832887e-06, "loss": 17.7564, "step": 21941 }, { "epoch": 0.40108212842963425, "grad_norm": 7.451427159131176, "learning_rate": 6.803434593894965e-06, "loss": 18.0171, "step": 21942 }, { "epoch": 0.40110040762608073, "grad_norm": 7.006701312109924, "learning_rate": 6.803158504636162e-06, "loss": 17.8858, "step": 21943 }, { "epoch": 0.40111868682252727, "grad_norm": 5.912711778513443, "learning_rate": 6.8028824090574455e-06, "loss": 17.4637, "step": 21944 }, { "epoch": 0.4011369660189738, "grad_norm": 5.719679315294928, "learning_rate": 6.802606307159782e-06, "loss": 17.2627, "step": 21945 }, { "epoch": 0.40115524521542034, "grad_norm": 6.126713852624104, "learning_rate": 6.802330198944138e-06, "loss": 17.4433, "step": 21946 }, { "epoch": 0.4011735244118669, "grad_norm": 5.778026223407568, "learning_rate": 6.802054084411486e-06, "loss": 17.0654, "step": 21947 }, { "epoch": 0.40119180360831336, "grad_norm": 6.839335613346102, "learning_rate": 6.80177796356279e-06, "loss": 17.4037, "step": 21948 }, { "epoch": 0.4012100828047599, "grad_norm": 6.688918250827921, "learning_rate": 6.801501836399017e-06, "loss": 17.4062, "step": 21949 }, { "epoch": 0.40122836200120643, "grad_norm": 5.431568159693095, "learning_rate": 6.801225702921138e-06, "loss": 17.3574, "step": 21950 }, { "epoch": 0.40124664119765296, "grad_norm": 6.668323124251579, "learning_rate": 6.800949563130119e-06, "loss": 17.6149, "step": 21951 }, { "epoch": 0.4012649203940995, "grad_norm": 6.999382372145549, "learning_rate": 6.8006734170269284e-06, "loss": 17.7897, "step": 21952 }, { "epoch": 0.401283199590546, "grad_norm": 6.190362614469207, "learning_rate": 6.800397264612533e-06, "loss": 17.9581, "step": 21953 }, { "epoch": 0.4013014787869925, "grad_norm": 5.57772871319978, "learning_rate": 6.8001211058879e-06, "loss": 17.3105, "step": 21954 }, { "epoch": 0.40131975798343905, "grad_norm": 5.700006419530701, "learning_rate": 6.799844940854002e-06, "loss": 17.1836, "step": 21955 }, { "epoch": 0.4013380371798856, "grad_norm": 6.858696783415418, "learning_rate": 6.799568769511802e-06, "loss": 17.7469, "step": 21956 }, { "epoch": 0.4013563163763321, "grad_norm": 6.797137546460257, "learning_rate": 6.7992925918622696e-06, "loss": 17.7856, "step": 21957 }, { "epoch": 0.4013745955727786, "grad_norm": 5.138139047181146, "learning_rate": 6.799016407906372e-06, "loss": 16.875, "step": 21958 }, { "epoch": 0.40139287476922514, "grad_norm": 7.769990713000648, "learning_rate": 6.79874021764508e-06, "loss": 17.2655, "step": 21959 }, { "epoch": 0.4014111539656717, "grad_norm": 5.793944453016079, "learning_rate": 6.7984640210793586e-06, "loss": 17.2418, "step": 21960 }, { "epoch": 0.4014294331621182, "grad_norm": 6.6119078654034515, "learning_rate": 6.798187818210176e-06, "loss": 17.6804, "step": 21961 }, { "epoch": 0.4014477123585647, "grad_norm": 6.897665729366046, "learning_rate": 6.797911609038503e-06, "loss": 17.5899, "step": 21962 }, { "epoch": 0.4014659915550112, "grad_norm": 6.964162749019687, "learning_rate": 6.797635393565304e-06, "loss": 17.151, "step": 21963 }, { "epoch": 0.40148427075145776, "grad_norm": 5.771019717862557, "learning_rate": 6.797359171791549e-06, "loss": 17.306, "step": 21964 }, { "epoch": 0.4015025499479043, "grad_norm": 6.331485271180629, "learning_rate": 6.797082943718207e-06, "loss": 17.2906, "step": 21965 }, { "epoch": 0.40152082914435083, "grad_norm": 5.79659720268587, "learning_rate": 6.796806709346246e-06, "loss": 17.4377, "step": 21966 }, { "epoch": 0.4015391083407973, "grad_norm": 7.211064151600302, "learning_rate": 6.796530468676632e-06, "loss": 17.4476, "step": 21967 }, { "epoch": 0.40155738753724385, "grad_norm": 7.455823922053474, "learning_rate": 6.796254221710335e-06, "loss": 17.9749, "step": 21968 }, { "epoch": 0.4015756667336904, "grad_norm": 5.516866774663918, "learning_rate": 6.795977968448323e-06, "loss": 17.2624, "step": 21969 }, { "epoch": 0.4015939459301369, "grad_norm": 5.234600543069211, "learning_rate": 6.795701708891563e-06, "loss": 17.0726, "step": 21970 }, { "epoch": 0.40161222512658346, "grad_norm": 5.393485636893945, "learning_rate": 6.795425443041026e-06, "loss": 16.8985, "step": 21971 }, { "epoch": 0.40163050432302994, "grad_norm": 7.261848924659315, "learning_rate": 6.795149170897677e-06, "loss": 18.2308, "step": 21972 }, { "epoch": 0.40164878351947647, "grad_norm": 5.681941304243241, "learning_rate": 6.794872892462487e-06, "loss": 17.3867, "step": 21973 }, { "epoch": 0.401667062715923, "grad_norm": 6.784338920543298, "learning_rate": 6.794596607736423e-06, "loss": 17.6111, "step": 21974 }, { "epoch": 0.40168534191236954, "grad_norm": 6.381183413292857, "learning_rate": 6.794320316720453e-06, "loss": 17.5785, "step": 21975 }, { "epoch": 0.4017036211088161, "grad_norm": 6.472831603511827, "learning_rate": 6.794044019415547e-06, "loss": 17.6216, "step": 21976 }, { "epoch": 0.40172190030526256, "grad_norm": 8.033889329602548, "learning_rate": 6.793767715822672e-06, "loss": 17.621, "step": 21977 }, { "epoch": 0.4017401795017091, "grad_norm": 7.652435122747328, "learning_rate": 6.793491405942797e-06, "loss": 18.1178, "step": 21978 }, { "epoch": 0.40175845869815563, "grad_norm": 5.7795189774144795, "learning_rate": 6.7932150897768914e-06, "loss": 17.304, "step": 21979 }, { "epoch": 0.40177673789460217, "grad_norm": 7.5296212022012545, "learning_rate": 6.792938767325921e-06, "loss": 18.2468, "step": 21980 }, { "epoch": 0.4017950170910487, "grad_norm": 7.349797483946474, "learning_rate": 6.792662438590854e-06, "loss": 17.8214, "step": 21981 }, { "epoch": 0.4018132962874952, "grad_norm": 7.531457348741944, "learning_rate": 6.792386103572663e-06, "loss": 17.9705, "step": 21982 }, { "epoch": 0.4018315754839417, "grad_norm": 5.983560340242937, "learning_rate": 6.792109762272315e-06, "loss": 17.3376, "step": 21983 }, { "epoch": 0.40184985468038825, "grad_norm": 8.365451107347186, "learning_rate": 6.791833414690776e-06, "loss": 18.2873, "step": 21984 }, { "epoch": 0.4018681338768348, "grad_norm": 6.06164443050326, "learning_rate": 6.791557060829017e-06, "loss": 17.4937, "step": 21985 }, { "epoch": 0.4018864130732813, "grad_norm": 5.389488352470851, "learning_rate": 6.791280700688006e-06, "loss": 17.1742, "step": 21986 }, { "epoch": 0.4019046922697278, "grad_norm": 6.035099688578071, "learning_rate": 6.7910043342687124e-06, "loss": 17.2362, "step": 21987 }, { "epoch": 0.40192297146617434, "grad_norm": 6.240104426224746, "learning_rate": 6.790727961572103e-06, "loss": 17.3403, "step": 21988 }, { "epoch": 0.4019412506626209, "grad_norm": 4.771364297154467, "learning_rate": 6.790451582599148e-06, "loss": 16.7517, "step": 21989 }, { "epoch": 0.4019595298590674, "grad_norm": 6.414069119573712, "learning_rate": 6.790175197350814e-06, "loss": 17.4858, "step": 21990 }, { "epoch": 0.40197780905551395, "grad_norm": 5.394892539709728, "learning_rate": 6.789898805828074e-06, "loss": 16.9978, "step": 21991 }, { "epoch": 0.4019960882519604, "grad_norm": 5.559454598292766, "learning_rate": 6.789622408031893e-06, "loss": 17.1492, "step": 21992 }, { "epoch": 0.40201436744840696, "grad_norm": 7.353267904120058, "learning_rate": 6.7893460039632404e-06, "loss": 18.0896, "step": 21993 }, { "epoch": 0.4020326466448535, "grad_norm": 6.918933038129437, "learning_rate": 6.789069593623085e-06, "loss": 17.9874, "step": 21994 }, { "epoch": 0.40205092584130003, "grad_norm": 5.892259725242188, "learning_rate": 6.788793177012396e-06, "loss": 17.4803, "step": 21995 }, { "epoch": 0.4020692050377465, "grad_norm": 6.42627628804412, "learning_rate": 6.788516754132142e-06, "loss": 17.584, "step": 21996 }, { "epoch": 0.40208748423419305, "grad_norm": 6.86353923249927, "learning_rate": 6.788240324983293e-06, "loss": 17.9212, "step": 21997 }, { "epoch": 0.4021057634306396, "grad_norm": 8.310368469293014, "learning_rate": 6.7879638895668165e-06, "loss": 17.9964, "step": 21998 }, { "epoch": 0.4021240426270861, "grad_norm": 6.562962555960447, "learning_rate": 6.787687447883682e-06, "loss": 17.6401, "step": 21999 }, { "epoch": 0.40214232182353266, "grad_norm": 7.473633228695363, "learning_rate": 6.787410999934857e-06, "loss": 18.0085, "step": 22000 }, { "epoch": 0.40216060101997914, "grad_norm": 5.169612281976841, "learning_rate": 6.787134545721312e-06, "loss": 16.9001, "step": 22001 }, { "epoch": 0.40217888021642567, "grad_norm": 5.74281798225418, "learning_rate": 6.786858085244015e-06, "loss": 17.1053, "step": 22002 }, { "epoch": 0.4021971594128722, "grad_norm": 6.11722418767506, "learning_rate": 6.786581618503936e-06, "loss": 17.4146, "step": 22003 }, { "epoch": 0.40221543860931874, "grad_norm": 6.453345918976262, "learning_rate": 6.786305145502043e-06, "loss": 17.4624, "step": 22004 }, { "epoch": 0.4022337178057653, "grad_norm": 5.904459705572975, "learning_rate": 6.786028666239306e-06, "loss": 17.1647, "step": 22005 }, { "epoch": 0.40225199700221176, "grad_norm": 7.64038538335007, "learning_rate": 6.785752180716694e-06, "loss": 17.9963, "step": 22006 }, { "epoch": 0.4022702761986583, "grad_norm": 6.0691043780964105, "learning_rate": 6.785475688935176e-06, "loss": 17.3344, "step": 22007 }, { "epoch": 0.40228855539510483, "grad_norm": 7.882165262024196, "learning_rate": 6.785199190895719e-06, "loss": 18.2275, "step": 22008 }, { "epoch": 0.40230683459155137, "grad_norm": 6.959207615504242, "learning_rate": 6.784922686599295e-06, "loss": 17.6494, "step": 22009 }, { "epoch": 0.4023251137879979, "grad_norm": 6.859123166489989, "learning_rate": 6.7846461760468714e-06, "loss": 17.4516, "step": 22010 }, { "epoch": 0.4023433929844444, "grad_norm": 7.235208318542025, "learning_rate": 6.784369659239418e-06, "loss": 17.4774, "step": 22011 }, { "epoch": 0.4023616721808909, "grad_norm": 5.988047756592985, "learning_rate": 6.784093136177903e-06, "loss": 17.3887, "step": 22012 }, { "epoch": 0.40237995137733745, "grad_norm": 6.956947575994767, "learning_rate": 6.783816606863296e-06, "loss": 17.7987, "step": 22013 }, { "epoch": 0.402398230573784, "grad_norm": 7.0798289717982765, "learning_rate": 6.783540071296568e-06, "loss": 17.8944, "step": 22014 }, { "epoch": 0.4024165097702305, "grad_norm": 6.891298784286834, "learning_rate": 6.783263529478686e-06, "loss": 17.8773, "step": 22015 }, { "epoch": 0.402434788966677, "grad_norm": 5.762533932607662, "learning_rate": 6.782986981410621e-06, "loss": 17.1087, "step": 22016 }, { "epoch": 0.40245306816312354, "grad_norm": 8.80927131911007, "learning_rate": 6.782710427093341e-06, "loss": 18.2545, "step": 22017 }, { "epoch": 0.4024713473595701, "grad_norm": 5.843840912648421, "learning_rate": 6.782433866527815e-06, "loss": 17.2093, "step": 22018 }, { "epoch": 0.4024896265560166, "grad_norm": 8.975227642255565, "learning_rate": 6.782157299715013e-06, "loss": 18.5843, "step": 22019 }, { "epoch": 0.40250790575246315, "grad_norm": 6.92250061229194, "learning_rate": 6.781880726655905e-06, "loss": 17.6077, "step": 22020 }, { "epoch": 0.4025261849489096, "grad_norm": 6.820599787468956, "learning_rate": 6.7816041473514606e-06, "loss": 17.688, "step": 22021 }, { "epoch": 0.40254446414535616, "grad_norm": 6.747177678529493, "learning_rate": 6.781327561802645e-06, "loss": 17.5352, "step": 22022 }, { "epoch": 0.4025627433418027, "grad_norm": 5.259387240651166, "learning_rate": 6.781050970010433e-06, "loss": 17.1437, "step": 22023 }, { "epoch": 0.40258102253824923, "grad_norm": 6.830641097745921, "learning_rate": 6.780774371975794e-06, "loss": 17.8107, "step": 22024 }, { "epoch": 0.40259930173469577, "grad_norm": 6.203491879441497, "learning_rate": 6.780497767699692e-06, "loss": 17.7428, "step": 22025 }, { "epoch": 0.40261758093114225, "grad_norm": 7.405202023530535, "learning_rate": 6.780221157183101e-06, "loss": 17.8754, "step": 22026 }, { "epoch": 0.4026358601275888, "grad_norm": 6.102518656227493, "learning_rate": 6.779944540426988e-06, "loss": 17.5551, "step": 22027 }, { "epoch": 0.4026541393240353, "grad_norm": 6.421375108809336, "learning_rate": 6.7796679174323265e-06, "loss": 17.4135, "step": 22028 }, { "epoch": 0.40267241852048186, "grad_norm": 5.960106085602953, "learning_rate": 6.7793912882000815e-06, "loss": 17.4722, "step": 22029 }, { "epoch": 0.40269069771692834, "grad_norm": 7.350397311128247, "learning_rate": 6.779114652731224e-06, "loss": 17.739, "step": 22030 }, { "epoch": 0.40270897691337487, "grad_norm": 7.4286504026203595, "learning_rate": 6.778838011026726e-06, "loss": 18.2491, "step": 22031 }, { "epoch": 0.4027272561098214, "grad_norm": 6.304389322318602, "learning_rate": 6.778561363087555e-06, "loss": 17.5771, "step": 22032 }, { "epoch": 0.40274553530626794, "grad_norm": 6.152669886805366, "learning_rate": 6.778284708914679e-06, "loss": 17.4633, "step": 22033 }, { "epoch": 0.4027638145027145, "grad_norm": 6.005907317098, "learning_rate": 6.778008048509071e-06, "loss": 17.727, "step": 22034 }, { "epoch": 0.40278209369916096, "grad_norm": 6.711324495143864, "learning_rate": 6.7777313818716974e-06, "loss": 17.7124, "step": 22035 }, { "epoch": 0.4028003728956075, "grad_norm": 6.923061489646611, "learning_rate": 6.77745470900353e-06, "loss": 18.0881, "step": 22036 }, { "epoch": 0.40281865209205403, "grad_norm": 6.470315405504754, "learning_rate": 6.777178029905539e-06, "loss": 17.5918, "step": 22037 }, { "epoch": 0.40283693128850057, "grad_norm": 6.143189444036186, "learning_rate": 6.776901344578694e-06, "loss": 17.4019, "step": 22038 }, { "epoch": 0.4028552104849471, "grad_norm": 6.553847836721387, "learning_rate": 6.776624653023962e-06, "loss": 17.5724, "step": 22039 }, { "epoch": 0.4028734896813936, "grad_norm": 7.314977848349203, "learning_rate": 6.776347955242315e-06, "loss": 17.931, "step": 22040 }, { "epoch": 0.4028917688778401, "grad_norm": 7.257845640885772, "learning_rate": 6.776071251234724e-06, "loss": 18.0227, "step": 22041 }, { "epoch": 0.40291004807428665, "grad_norm": 7.347759081709609, "learning_rate": 6.7757945410021565e-06, "loss": 17.9248, "step": 22042 }, { "epoch": 0.4029283272707332, "grad_norm": 7.469395251740494, "learning_rate": 6.775517824545583e-06, "loss": 17.7553, "step": 22043 }, { "epoch": 0.4029466064671797, "grad_norm": 6.61346924708778, "learning_rate": 6.775241101865975e-06, "loss": 17.6877, "step": 22044 }, { "epoch": 0.4029648856636262, "grad_norm": 9.551444592270448, "learning_rate": 6.774964372964299e-06, "loss": 18.2672, "step": 22045 }, { "epoch": 0.40298316486007274, "grad_norm": 5.8401528513055885, "learning_rate": 6.7746876378415286e-06, "loss": 17.421, "step": 22046 }, { "epoch": 0.4030014440565193, "grad_norm": 6.567797173097257, "learning_rate": 6.77441089649863e-06, "loss": 17.3372, "step": 22047 }, { "epoch": 0.4030197232529658, "grad_norm": 6.330795420549813, "learning_rate": 6.774134148936578e-06, "loss": 17.6241, "step": 22048 }, { "epoch": 0.40303800244941235, "grad_norm": 4.981431999872036, "learning_rate": 6.773857395156337e-06, "loss": 16.9476, "step": 22049 }, { "epoch": 0.4030562816458588, "grad_norm": 7.565433370051487, "learning_rate": 6.7735806351588805e-06, "loss": 18.1421, "step": 22050 }, { "epoch": 0.40307456084230536, "grad_norm": 7.49806402746438, "learning_rate": 6.773303868945178e-06, "loss": 18.1754, "step": 22051 }, { "epoch": 0.4030928400387519, "grad_norm": 7.030163964983321, "learning_rate": 6.773027096516201e-06, "loss": 17.6668, "step": 22052 }, { "epoch": 0.40311111923519843, "grad_norm": 7.069973587782146, "learning_rate": 6.772750317872916e-06, "loss": 17.7277, "step": 22053 }, { "epoch": 0.40312939843164497, "grad_norm": 6.569088556625905, "learning_rate": 6.772473533016294e-06, "loss": 17.4102, "step": 22054 }, { "epoch": 0.40314767762809145, "grad_norm": 6.114960161641345, "learning_rate": 6.772196741947308e-06, "loss": 17.4188, "step": 22055 }, { "epoch": 0.403165956824538, "grad_norm": 6.1939415525572254, "learning_rate": 6.771919944666926e-06, "loss": 17.467, "step": 22056 }, { "epoch": 0.4031842360209845, "grad_norm": 6.43608017716079, "learning_rate": 6.771643141176118e-06, "loss": 17.6242, "step": 22057 }, { "epoch": 0.40320251521743106, "grad_norm": 6.264800078105397, "learning_rate": 6.771366331475854e-06, "loss": 17.4917, "step": 22058 }, { "epoch": 0.4032207944138776, "grad_norm": 6.575052766783038, "learning_rate": 6.771089515567105e-06, "loss": 17.8639, "step": 22059 }, { "epoch": 0.4032390736103241, "grad_norm": 7.931746975453965, "learning_rate": 6.770812693450841e-06, "loss": 18.1557, "step": 22060 }, { "epoch": 0.4032573528067706, "grad_norm": 4.861686580927656, "learning_rate": 6.770535865128033e-06, "loss": 16.9002, "step": 22061 }, { "epoch": 0.40327563200321714, "grad_norm": 6.135266402479794, "learning_rate": 6.7702590305996485e-06, "loss": 17.1294, "step": 22062 }, { "epoch": 0.4032939111996637, "grad_norm": 6.025703055757253, "learning_rate": 6.769982189866662e-06, "loss": 17.4299, "step": 22063 }, { "epoch": 0.40331219039611016, "grad_norm": 5.818448677965238, "learning_rate": 6.7697053429300395e-06, "loss": 17.1443, "step": 22064 }, { "epoch": 0.4033304695925567, "grad_norm": 6.500125203523118, "learning_rate": 6.769428489790755e-06, "loss": 17.5327, "step": 22065 }, { "epoch": 0.40334874878900323, "grad_norm": 6.401033609103858, "learning_rate": 6.7691516304497775e-06, "loss": 17.3931, "step": 22066 }, { "epoch": 0.40336702798544977, "grad_norm": 7.247341161607492, "learning_rate": 6.768874764908074e-06, "loss": 17.8451, "step": 22067 }, { "epoch": 0.4033853071818963, "grad_norm": 9.345615282623246, "learning_rate": 6.7685978931666204e-06, "loss": 18.7439, "step": 22068 }, { "epoch": 0.4034035863783428, "grad_norm": 6.891614580638836, "learning_rate": 6.768321015226385e-06, "loss": 17.4206, "step": 22069 }, { "epoch": 0.4034218655747893, "grad_norm": 4.931124718854705, "learning_rate": 6.768044131088337e-06, "loss": 17.0932, "step": 22070 }, { "epoch": 0.40344014477123585, "grad_norm": 6.823202517333984, "learning_rate": 6.767767240753448e-06, "loss": 17.6827, "step": 22071 }, { "epoch": 0.4034584239676824, "grad_norm": 5.133011928741689, "learning_rate": 6.767490344222687e-06, "loss": 16.9361, "step": 22072 }, { "epoch": 0.4034767031641289, "grad_norm": 6.51980297676806, "learning_rate": 6.767213441497028e-06, "loss": 17.5075, "step": 22073 }, { "epoch": 0.4034949823605754, "grad_norm": 6.3817933788838435, "learning_rate": 6.766936532577438e-06, "loss": 17.2291, "step": 22074 }, { "epoch": 0.40351326155702194, "grad_norm": 6.940379698871074, "learning_rate": 6.766659617464889e-06, "loss": 17.7432, "step": 22075 }, { "epoch": 0.4035315407534685, "grad_norm": 7.381287030269214, "learning_rate": 6.766382696160351e-06, "loss": 17.8526, "step": 22076 }, { "epoch": 0.403549819949915, "grad_norm": 7.3041508553010015, "learning_rate": 6.766105768664795e-06, "loss": 17.9687, "step": 22077 }, { "epoch": 0.40356809914636155, "grad_norm": 6.578276686090134, "learning_rate": 6.765828834979191e-06, "loss": 17.6986, "step": 22078 }, { "epoch": 0.403586378342808, "grad_norm": 6.286582031636894, "learning_rate": 6.765551895104512e-06, "loss": 17.4138, "step": 22079 }, { "epoch": 0.40360465753925456, "grad_norm": 6.793606781535274, "learning_rate": 6.765274949041726e-06, "loss": 17.7358, "step": 22080 }, { "epoch": 0.4036229367357011, "grad_norm": 8.42867156786083, "learning_rate": 6.764997996791803e-06, "loss": 17.849, "step": 22081 }, { "epoch": 0.40364121593214763, "grad_norm": 6.2925320194200465, "learning_rate": 6.764721038355716e-06, "loss": 17.3358, "step": 22082 }, { "epoch": 0.40365949512859417, "grad_norm": 6.380862232199877, "learning_rate": 6.764444073734436e-06, "loss": 17.5864, "step": 22083 }, { "epoch": 0.40367777432504065, "grad_norm": 5.93191903512372, "learning_rate": 6.764167102928932e-06, "loss": 17.2688, "step": 22084 }, { "epoch": 0.4036960535214872, "grad_norm": 6.916394760447434, "learning_rate": 6.7638901259401755e-06, "loss": 17.6485, "step": 22085 }, { "epoch": 0.4037143327179337, "grad_norm": 7.711118810005729, "learning_rate": 6.763613142769137e-06, "loss": 18.2313, "step": 22086 }, { "epoch": 0.40373261191438026, "grad_norm": 7.187299408147425, "learning_rate": 6.763336153416787e-06, "loss": 17.5883, "step": 22087 }, { "epoch": 0.4037508911108268, "grad_norm": 6.224547537223937, "learning_rate": 6.763059157884098e-06, "loss": 17.4435, "step": 22088 }, { "epoch": 0.4037691703072733, "grad_norm": 5.817792774023097, "learning_rate": 6.762782156172037e-06, "loss": 17.4436, "step": 22089 }, { "epoch": 0.4037874495037198, "grad_norm": 7.816523837537163, "learning_rate": 6.76250514828158e-06, "loss": 17.8937, "step": 22090 }, { "epoch": 0.40380572870016634, "grad_norm": 6.207199183236863, "learning_rate": 6.762228134213695e-06, "loss": 17.5285, "step": 22091 }, { "epoch": 0.4038240078966129, "grad_norm": 6.839655499113275, "learning_rate": 6.761951113969353e-06, "loss": 17.7867, "step": 22092 }, { "epoch": 0.4038422870930594, "grad_norm": 7.786225975050639, "learning_rate": 6.761674087549526e-06, "loss": 18.0969, "step": 22093 }, { "epoch": 0.4038605662895059, "grad_norm": 6.049391074637899, "learning_rate": 6.761397054955182e-06, "loss": 17.4082, "step": 22094 }, { "epoch": 0.40387884548595243, "grad_norm": 6.449817757699847, "learning_rate": 6.761120016187296e-06, "loss": 17.5026, "step": 22095 }, { "epoch": 0.40389712468239897, "grad_norm": 6.807746451894597, "learning_rate": 6.760842971246837e-06, "loss": 17.4275, "step": 22096 }, { "epoch": 0.4039154038788455, "grad_norm": 5.916084772017239, "learning_rate": 6.760565920134776e-06, "loss": 17.326, "step": 22097 }, { "epoch": 0.403933683075292, "grad_norm": 7.6902640515359915, "learning_rate": 6.760288862852085e-06, "loss": 17.8501, "step": 22098 }, { "epoch": 0.4039519622717385, "grad_norm": 7.595915409993594, "learning_rate": 6.760011799399732e-06, "loss": 18.0188, "step": 22099 }, { "epoch": 0.40397024146818505, "grad_norm": 5.399474979429258, "learning_rate": 6.759734729778693e-06, "loss": 17.0223, "step": 22100 }, { "epoch": 0.4039885206646316, "grad_norm": 6.034056826555642, "learning_rate": 6.759457653989936e-06, "loss": 17.1478, "step": 22101 }, { "epoch": 0.4040067998610781, "grad_norm": 5.237464031625341, "learning_rate": 6.759180572034432e-06, "loss": 17.1195, "step": 22102 }, { "epoch": 0.4040250790575246, "grad_norm": 6.949943876542296, "learning_rate": 6.758903483913152e-06, "loss": 17.529, "step": 22103 }, { "epoch": 0.40404335825397114, "grad_norm": 5.775795031745005, "learning_rate": 6.758626389627068e-06, "loss": 17.3392, "step": 22104 }, { "epoch": 0.4040616374504177, "grad_norm": 6.04868043272047, "learning_rate": 6.7583492891771516e-06, "loss": 17.0395, "step": 22105 }, { "epoch": 0.4040799166468642, "grad_norm": 6.370321786337064, "learning_rate": 6.758072182564374e-06, "loss": 17.5069, "step": 22106 }, { "epoch": 0.40409819584331075, "grad_norm": 6.813322830272538, "learning_rate": 6.757795069789706e-06, "loss": 17.2654, "step": 22107 }, { "epoch": 0.40411647503975723, "grad_norm": 6.144323791285378, "learning_rate": 6.757517950854118e-06, "loss": 17.619, "step": 22108 }, { "epoch": 0.40413475423620376, "grad_norm": 5.941140413558077, "learning_rate": 6.757240825758582e-06, "loss": 17.5066, "step": 22109 }, { "epoch": 0.4041530334326503, "grad_norm": 6.320242546936558, "learning_rate": 6.756963694504071e-06, "loss": 17.3604, "step": 22110 }, { "epoch": 0.40417131262909683, "grad_norm": 6.601216207740825, "learning_rate": 6.756686557091554e-06, "loss": 17.4847, "step": 22111 }, { "epoch": 0.40418959182554337, "grad_norm": 6.657458883317337, "learning_rate": 6.756409413522002e-06, "loss": 17.4861, "step": 22112 }, { "epoch": 0.40420787102198985, "grad_norm": 7.012513906515419, "learning_rate": 6.7561322637963865e-06, "loss": 17.7998, "step": 22113 }, { "epoch": 0.4042261502184364, "grad_norm": 5.949843811360443, "learning_rate": 6.755855107915683e-06, "loss": 17.0085, "step": 22114 }, { "epoch": 0.4042444294148829, "grad_norm": 5.812895215797421, "learning_rate": 6.755577945880858e-06, "loss": 17.3518, "step": 22115 }, { "epoch": 0.40426270861132946, "grad_norm": 5.973497965673985, "learning_rate": 6.755300777692885e-06, "loss": 17.3914, "step": 22116 }, { "epoch": 0.404280987807776, "grad_norm": 5.411971787295952, "learning_rate": 6.755023603352735e-06, "loss": 17.1019, "step": 22117 }, { "epoch": 0.4042992670042225, "grad_norm": 5.909867899448887, "learning_rate": 6.754746422861379e-06, "loss": 17.1723, "step": 22118 }, { "epoch": 0.404317546200669, "grad_norm": 7.245289313835801, "learning_rate": 6.754469236219789e-06, "loss": 17.984, "step": 22119 }, { "epoch": 0.40433582539711554, "grad_norm": 6.789279055749452, "learning_rate": 6.754192043428938e-06, "loss": 17.5433, "step": 22120 }, { "epoch": 0.4043541045935621, "grad_norm": 5.120970386248282, "learning_rate": 6.753914844489795e-06, "loss": 16.965, "step": 22121 }, { "epoch": 0.4043723837900086, "grad_norm": 6.566986376265424, "learning_rate": 6.753637639403332e-06, "loss": 17.2193, "step": 22122 }, { "epoch": 0.4043906629864551, "grad_norm": 6.546714540317617, "learning_rate": 6.753360428170523e-06, "loss": 17.2887, "step": 22123 }, { "epoch": 0.40440894218290163, "grad_norm": 8.074619644270074, "learning_rate": 6.753083210792337e-06, "loss": 17.7437, "step": 22124 }, { "epoch": 0.40442722137934817, "grad_norm": 6.365788857589704, "learning_rate": 6.752805987269746e-06, "loss": 17.5323, "step": 22125 }, { "epoch": 0.4044455005757947, "grad_norm": 5.925189280272979, "learning_rate": 6.752528757603722e-06, "loss": 17.4087, "step": 22126 }, { "epoch": 0.40446377977224124, "grad_norm": 7.873084083845511, "learning_rate": 6.752251521795236e-06, "loss": 18.2124, "step": 22127 }, { "epoch": 0.4044820589686877, "grad_norm": 6.146300131736766, "learning_rate": 6.751974279845264e-06, "loss": 17.4707, "step": 22128 }, { "epoch": 0.40450033816513425, "grad_norm": 5.7523359568742265, "learning_rate": 6.751697031754772e-06, "loss": 17.4216, "step": 22129 }, { "epoch": 0.4045186173615808, "grad_norm": 6.698152489926536, "learning_rate": 6.751419777524734e-06, "loss": 17.41, "step": 22130 }, { "epoch": 0.4045368965580273, "grad_norm": 6.921879800943967, "learning_rate": 6.7511425171561205e-06, "loss": 17.8194, "step": 22131 }, { "epoch": 0.4045551757544738, "grad_norm": 6.662011338095347, "learning_rate": 6.750865250649906e-06, "loss": 17.5918, "step": 22132 }, { "epoch": 0.40457345495092034, "grad_norm": 6.260882181315775, "learning_rate": 6.75058797800706e-06, "loss": 17.3202, "step": 22133 }, { "epoch": 0.4045917341473669, "grad_norm": 5.924199633577894, "learning_rate": 6.750310699228555e-06, "loss": 17.4094, "step": 22134 }, { "epoch": 0.4046100133438134, "grad_norm": 6.184279968253344, "learning_rate": 6.750033414315363e-06, "loss": 17.2393, "step": 22135 }, { "epoch": 0.40462829254025995, "grad_norm": 9.379302029088581, "learning_rate": 6.749756123268456e-06, "loss": 18.2848, "step": 22136 }, { "epoch": 0.40464657173670643, "grad_norm": 6.544399335645791, "learning_rate": 6.749478826088806e-06, "loss": 17.6162, "step": 22137 }, { "epoch": 0.40466485093315296, "grad_norm": 5.786205780710364, "learning_rate": 6.749201522777385e-06, "loss": 17.2665, "step": 22138 }, { "epoch": 0.4046831301295995, "grad_norm": 8.181619392120167, "learning_rate": 6.748924213335163e-06, "loss": 18.2582, "step": 22139 }, { "epoch": 0.40470140932604604, "grad_norm": 6.457207279081686, "learning_rate": 6.7486468977631126e-06, "loss": 17.1033, "step": 22140 }, { "epoch": 0.40471968852249257, "grad_norm": 7.855967774278722, "learning_rate": 6.748369576062208e-06, "loss": 18.189, "step": 22141 }, { "epoch": 0.40473796771893905, "grad_norm": 4.969703936150911, "learning_rate": 6.74809224823342e-06, "loss": 16.9735, "step": 22142 }, { "epoch": 0.4047562469153856, "grad_norm": 7.140640634446646, "learning_rate": 6.74781491427772e-06, "loss": 17.6187, "step": 22143 }, { "epoch": 0.4047745261118321, "grad_norm": 6.037265655891585, "learning_rate": 6.74753757419608e-06, "loss": 17.2544, "step": 22144 }, { "epoch": 0.40479280530827866, "grad_norm": 7.297147796514037, "learning_rate": 6.747260227989473e-06, "loss": 17.9993, "step": 22145 }, { "epoch": 0.4048110845047252, "grad_norm": 9.024807169459095, "learning_rate": 6.7469828756588694e-06, "loss": 18.2871, "step": 22146 }, { "epoch": 0.4048293637011717, "grad_norm": 5.0270602565412945, "learning_rate": 6.746705517205244e-06, "loss": 16.9534, "step": 22147 }, { "epoch": 0.4048476428976182, "grad_norm": 5.270923813606672, "learning_rate": 6.746428152629567e-06, "loss": 16.99, "step": 22148 }, { "epoch": 0.40486592209406475, "grad_norm": 6.167460161072971, "learning_rate": 6.746150781932809e-06, "loss": 17.4296, "step": 22149 }, { "epoch": 0.4048842012905113, "grad_norm": 7.3482352376413695, "learning_rate": 6.745873405115946e-06, "loss": 18.2224, "step": 22150 }, { "epoch": 0.4049024804869578, "grad_norm": 6.671407230482707, "learning_rate": 6.7455960221799475e-06, "loss": 17.0869, "step": 22151 }, { "epoch": 0.4049207596834043, "grad_norm": 7.619510605552885, "learning_rate": 6.745318633125788e-06, "loss": 17.9083, "step": 22152 }, { "epoch": 0.40493903887985083, "grad_norm": 7.018422401100924, "learning_rate": 6.745041237954437e-06, "loss": 17.8694, "step": 22153 }, { "epoch": 0.40495731807629737, "grad_norm": 5.185636416302049, "learning_rate": 6.744763836666866e-06, "loss": 16.768, "step": 22154 }, { "epoch": 0.4049755972727439, "grad_norm": 5.460020134601508, "learning_rate": 6.7444864292640525e-06, "loss": 17.1994, "step": 22155 }, { "epoch": 0.40499387646919044, "grad_norm": 5.8555374893900485, "learning_rate": 6.744209015746963e-06, "loss": 17.2155, "step": 22156 }, { "epoch": 0.4050121556656369, "grad_norm": 5.443443031534633, "learning_rate": 6.743931596116573e-06, "loss": 17.1019, "step": 22157 }, { "epoch": 0.40503043486208345, "grad_norm": 6.232530723447787, "learning_rate": 6.743654170373855e-06, "loss": 17.3289, "step": 22158 }, { "epoch": 0.40504871405853, "grad_norm": 6.257351557302948, "learning_rate": 6.743376738519779e-06, "loss": 17.3444, "step": 22159 }, { "epoch": 0.4050669932549765, "grad_norm": 6.7702966690245034, "learning_rate": 6.743099300555319e-06, "loss": 17.714, "step": 22160 }, { "epoch": 0.40508527245142306, "grad_norm": 7.751807796514337, "learning_rate": 6.742821856481448e-06, "loss": 17.7319, "step": 22161 }, { "epoch": 0.40510355164786954, "grad_norm": 6.509454448143292, "learning_rate": 6.742544406299137e-06, "loss": 17.506, "step": 22162 }, { "epoch": 0.4051218308443161, "grad_norm": 7.400758878878089, "learning_rate": 6.742266950009359e-06, "loss": 17.9424, "step": 22163 }, { "epoch": 0.4051401100407626, "grad_norm": 5.882275896544902, "learning_rate": 6.741989487613087e-06, "loss": 17.4201, "step": 22164 }, { "epoch": 0.40515838923720915, "grad_norm": 7.6409091347877975, "learning_rate": 6.741712019111293e-06, "loss": 17.9477, "step": 22165 }, { "epoch": 0.40517666843365563, "grad_norm": 6.4758461795965205, "learning_rate": 6.74143454450495e-06, "loss": 17.6987, "step": 22166 }, { "epoch": 0.40519494763010216, "grad_norm": 6.2413095213566905, "learning_rate": 6.741157063795028e-06, "loss": 17.3144, "step": 22167 }, { "epoch": 0.4052132268265487, "grad_norm": 6.9565224842103675, "learning_rate": 6.740879576982505e-06, "loss": 17.714, "step": 22168 }, { "epoch": 0.40523150602299524, "grad_norm": 6.0106981750372634, "learning_rate": 6.740602084068349e-06, "loss": 17.2819, "step": 22169 }, { "epoch": 0.40524978521944177, "grad_norm": 5.607878065581783, "learning_rate": 6.740324585053532e-06, "loss": 16.992, "step": 22170 }, { "epoch": 0.40526806441588825, "grad_norm": 6.215494327541819, "learning_rate": 6.740047079939028e-06, "loss": 17.2968, "step": 22171 }, { "epoch": 0.4052863436123348, "grad_norm": 6.622060494217935, "learning_rate": 6.7397695687258115e-06, "loss": 17.5319, "step": 22172 }, { "epoch": 0.4053046228087813, "grad_norm": 6.28169817228459, "learning_rate": 6.7394920514148535e-06, "loss": 17.4997, "step": 22173 }, { "epoch": 0.40532290200522786, "grad_norm": 6.7644607074438605, "learning_rate": 6.739214528007126e-06, "loss": 17.8697, "step": 22174 }, { "epoch": 0.4053411812016744, "grad_norm": 5.951257001626463, "learning_rate": 6.738936998503603e-06, "loss": 17.2977, "step": 22175 }, { "epoch": 0.4053594603981209, "grad_norm": 5.930150434365645, "learning_rate": 6.738659462905257e-06, "loss": 17.2478, "step": 22176 }, { "epoch": 0.4053777395945674, "grad_norm": 5.6784804525390715, "learning_rate": 6.738381921213061e-06, "loss": 17.4433, "step": 22177 }, { "epoch": 0.40539601879101395, "grad_norm": 6.64308669008464, "learning_rate": 6.738104373427986e-06, "loss": 17.3754, "step": 22178 }, { "epoch": 0.4054142979874605, "grad_norm": 5.741868228596022, "learning_rate": 6.737826819551008e-06, "loss": 17.1142, "step": 22179 }, { "epoch": 0.405432577183907, "grad_norm": 5.997290038820609, "learning_rate": 6.737549259583096e-06, "loss": 17.3825, "step": 22180 }, { "epoch": 0.4054508563803535, "grad_norm": 5.844866164401639, "learning_rate": 6.7372716935252235e-06, "loss": 17.267, "step": 22181 }, { "epoch": 0.40546913557680003, "grad_norm": 6.673007568731624, "learning_rate": 6.7369941213783664e-06, "loss": 17.7447, "step": 22182 }, { "epoch": 0.40548741477324657, "grad_norm": 6.79963815163332, "learning_rate": 6.736716543143496e-06, "loss": 17.7124, "step": 22183 }, { "epoch": 0.4055056939696931, "grad_norm": 6.457810547830599, "learning_rate": 6.736438958821584e-06, "loss": 17.3048, "step": 22184 }, { "epoch": 0.40552397316613964, "grad_norm": 6.999599851720819, "learning_rate": 6.736161368413604e-06, "loss": 17.8615, "step": 22185 }, { "epoch": 0.4055422523625861, "grad_norm": 5.936760149888992, "learning_rate": 6.735883771920528e-06, "loss": 17.3027, "step": 22186 }, { "epoch": 0.40556053155903266, "grad_norm": 6.800880808213196, "learning_rate": 6.7356061693433314e-06, "loss": 17.8916, "step": 22187 }, { "epoch": 0.4055788107554792, "grad_norm": 6.662290234587748, "learning_rate": 6.7353285606829855e-06, "loss": 17.6892, "step": 22188 }, { "epoch": 0.4055970899519257, "grad_norm": 6.0424886929919355, "learning_rate": 6.7350509459404644e-06, "loss": 17.3845, "step": 22189 }, { "epoch": 0.40561536914837226, "grad_norm": 8.329936546732132, "learning_rate": 6.734773325116739e-06, "loss": 18.0189, "step": 22190 }, { "epoch": 0.40563364834481874, "grad_norm": 6.792306073485588, "learning_rate": 6.734495698212784e-06, "loss": 17.8954, "step": 22191 }, { "epoch": 0.4056519275412653, "grad_norm": 5.311830982957135, "learning_rate": 6.734218065229572e-06, "loss": 17.1164, "step": 22192 }, { "epoch": 0.4056702067377118, "grad_norm": 6.775960255611034, "learning_rate": 6.7339404261680775e-06, "loss": 17.197, "step": 22193 }, { "epoch": 0.40568848593415835, "grad_norm": 7.348697310052713, "learning_rate": 6.733662781029271e-06, "loss": 17.8498, "step": 22194 }, { "epoch": 0.4057067651306049, "grad_norm": 8.793370791530828, "learning_rate": 6.733385129814126e-06, "loss": 17.9595, "step": 22195 }, { "epoch": 0.40572504432705137, "grad_norm": 7.5304583369701, "learning_rate": 6.733107472523618e-06, "loss": 17.9314, "step": 22196 }, { "epoch": 0.4057433235234979, "grad_norm": 7.494023003913464, "learning_rate": 6.732829809158719e-06, "loss": 17.8105, "step": 22197 }, { "epoch": 0.40576160271994444, "grad_norm": 6.0353004471521405, "learning_rate": 6.7325521397204005e-06, "loss": 17.3592, "step": 22198 }, { "epoch": 0.40577988191639097, "grad_norm": 6.462822894306157, "learning_rate": 6.732274464209637e-06, "loss": 17.5024, "step": 22199 }, { "epoch": 0.40579816111283745, "grad_norm": 7.389732610475244, "learning_rate": 6.731996782627404e-06, "loss": 17.8692, "step": 22200 }, { "epoch": 0.405816440309284, "grad_norm": 7.098776795984079, "learning_rate": 6.731719094974671e-06, "loss": 17.963, "step": 22201 }, { "epoch": 0.4058347195057305, "grad_norm": 7.033700437195739, "learning_rate": 6.7314414012524135e-06, "loss": 17.4693, "step": 22202 }, { "epoch": 0.40585299870217706, "grad_norm": 5.605984450205299, "learning_rate": 6.731163701461603e-06, "loss": 17.1704, "step": 22203 }, { "epoch": 0.4058712778986236, "grad_norm": 6.494532560138076, "learning_rate": 6.730885995603215e-06, "loss": 17.3485, "step": 22204 }, { "epoch": 0.4058895570950701, "grad_norm": 6.795673206135465, "learning_rate": 6.730608283678222e-06, "loss": 17.7579, "step": 22205 }, { "epoch": 0.4059078362915166, "grad_norm": 6.104057564248977, "learning_rate": 6.730330565687596e-06, "loss": 17.2064, "step": 22206 }, { "epoch": 0.40592611548796315, "grad_norm": 7.248129920360663, "learning_rate": 6.730052841632313e-06, "loss": 17.7974, "step": 22207 }, { "epoch": 0.4059443946844097, "grad_norm": 7.279123128963967, "learning_rate": 6.729775111513342e-06, "loss": 17.781, "step": 22208 }, { "epoch": 0.4059626738808562, "grad_norm": 6.649230320363981, "learning_rate": 6.729497375331662e-06, "loss": 17.9351, "step": 22209 }, { "epoch": 0.4059809530773027, "grad_norm": 5.495569677274187, "learning_rate": 6.729219633088244e-06, "loss": 17.0028, "step": 22210 }, { "epoch": 0.40599923227374923, "grad_norm": 6.623289284082819, "learning_rate": 6.72894188478406e-06, "loss": 17.7299, "step": 22211 }, { "epoch": 0.40601751147019577, "grad_norm": 7.003139447383208, "learning_rate": 6.728664130420085e-06, "loss": 17.71, "step": 22212 }, { "epoch": 0.4060357906666423, "grad_norm": 5.921916466668357, "learning_rate": 6.728386369997292e-06, "loss": 17.101, "step": 22213 }, { "epoch": 0.40605406986308884, "grad_norm": 5.181627189879955, "learning_rate": 6.728108603516655e-06, "loss": 17.0198, "step": 22214 }, { "epoch": 0.4060723490595353, "grad_norm": 8.467760984784013, "learning_rate": 6.727830830979148e-06, "loss": 18.6448, "step": 22215 }, { "epoch": 0.40609062825598186, "grad_norm": 6.226974569968773, "learning_rate": 6.727553052385742e-06, "loss": 17.2704, "step": 22216 }, { "epoch": 0.4061089074524284, "grad_norm": 7.620899612529509, "learning_rate": 6.727275267737414e-06, "loss": 17.6003, "step": 22217 }, { "epoch": 0.4061271866488749, "grad_norm": 5.428206503269267, "learning_rate": 6.726997477035137e-06, "loss": 17.1095, "step": 22218 }, { "epoch": 0.40614546584532146, "grad_norm": 6.234512034152487, "learning_rate": 6.7267196802798814e-06, "loss": 17.4033, "step": 22219 }, { "epoch": 0.40616374504176794, "grad_norm": 5.91543801171165, "learning_rate": 6.726441877472625e-06, "loss": 17.2918, "step": 22220 }, { "epoch": 0.4061820242382145, "grad_norm": 5.962861017427411, "learning_rate": 6.726164068614338e-06, "loss": 17.1846, "step": 22221 }, { "epoch": 0.406200303434661, "grad_norm": 5.507652938256123, "learning_rate": 6.725886253705996e-06, "loss": 17.1938, "step": 22222 }, { "epoch": 0.40621858263110755, "grad_norm": 6.847664016963516, "learning_rate": 6.7256084327485735e-06, "loss": 17.7769, "step": 22223 }, { "epoch": 0.4062368618275541, "grad_norm": 6.7659342386056025, "learning_rate": 6.725330605743043e-06, "loss": 17.3308, "step": 22224 }, { "epoch": 0.40625514102400057, "grad_norm": 7.855577634776607, "learning_rate": 6.725052772690379e-06, "loss": 17.9193, "step": 22225 }, { "epoch": 0.4062734202204471, "grad_norm": 10.47053549414686, "learning_rate": 6.7247749335915526e-06, "loss": 18.4262, "step": 22226 }, { "epoch": 0.40629169941689364, "grad_norm": 6.279469691492723, "learning_rate": 6.724497088447541e-06, "loss": 17.613, "step": 22227 }, { "epoch": 0.4063099786133402, "grad_norm": 6.6329844426406765, "learning_rate": 6.724219237259318e-06, "loss": 17.5052, "step": 22228 }, { "epoch": 0.4063282578097867, "grad_norm": 5.967539116521782, "learning_rate": 6.723941380027854e-06, "loss": 17.4117, "step": 22229 }, { "epoch": 0.4063465370062332, "grad_norm": 6.275821818703162, "learning_rate": 6.723663516754126e-06, "loss": 17.66, "step": 22230 }, { "epoch": 0.4063648162026797, "grad_norm": 6.574200769598269, "learning_rate": 6.723385647439108e-06, "loss": 17.7173, "step": 22231 }, { "epoch": 0.40638309539912626, "grad_norm": 8.875882950778484, "learning_rate": 6.7231077720837714e-06, "loss": 18.4791, "step": 22232 }, { "epoch": 0.4064013745955728, "grad_norm": 7.896599020986848, "learning_rate": 6.722829890689092e-06, "loss": 18.1513, "step": 22233 }, { "epoch": 0.4064196537920193, "grad_norm": 6.937869112786421, "learning_rate": 6.722552003256043e-06, "loss": 17.5613, "step": 22234 }, { "epoch": 0.4064379329884658, "grad_norm": 8.10140539035616, "learning_rate": 6.722274109785599e-06, "loss": 18.0932, "step": 22235 }, { "epoch": 0.40645621218491235, "grad_norm": 5.71491763157779, "learning_rate": 6.721996210278734e-06, "loss": 17.2551, "step": 22236 }, { "epoch": 0.4064744913813589, "grad_norm": 5.936790365388158, "learning_rate": 6.721718304736421e-06, "loss": 17.4498, "step": 22237 }, { "epoch": 0.4064927705778054, "grad_norm": 6.032687222202416, "learning_rate": 6.721440393159636e-06, "loss": 17.2518, "step": 22238 }, { "epoch": 0.4065110497742519, "grad_norm": 6.356822217101505, "learning_rate": 6.721162475549351e-06, "loss": 17.4824, "step": 22239 }, { "epoch": 0.40652932897069843, "grad_norm": 6.64672910711208, "learning_rate": 6.72088455190654e-06, "loss": 17.798, "step": 22240 }, { "epoch": 0.40654760816714497, "grad_norm": 6.381776877968177, "learning_rate": 6.720606622232179e-06, "loss": 17.3612, "step": 22241 }, { "epoch": 0.4065658873635915, "grad_norm": 6.884852765220903, "learning_rate": 6.720328686527242e-06, "loss": 17.6881, "step": 22242 }, { "epoch": 0.40658416656003804, "grad_norm": 6.519872071151057, "learning_rate": 6.720050744792701e-06, "loss": 17.3681, "step": 22243 }, { "epoch": 0.4066024457564845, "grad_norm": 6.4910420545005465, "learning_rate": 6.719772797029531e-06, "loss": 17.542, "step": 22244 }, { "epoch": 0.40662072495293106, "grad_norm": 6.679339522996079, "learning_rate": 6.719494843238707e-06, "loss": 17.6046, "step": 22245 }, { "epoch": 0.4066390041493776, "grad_norm": 6.380391764448618, "learning_rate": 6.7192168834212036e-06, "loss": 17.5827, "step": 22246 }, { "epoch": 0.4066572833458241, "grad_norm": 8.933926730953658, "learning_rate": 6.718938917577993e-06, "loss": 18.9389, "step": 22247 }, { "epoch": 0.40667556254227066, "grad_norm": 6.058289307292574, "learning_rate": 6.718660945710052e-06, "loss": 17.395, "step": 22248 }, { "epoch": 0.40669384173871714, "grad_norm": 6.329572252191765, "learning_rate": 6.718382967818352e-06, "loss": 17.6975, "step": 22249 }, { "epoch": 0.4067121209351637, "grad_norm": 7.076438856369958, "learning_rate": 6.718104983903869e-06, "loss": 17.8666, "step": 22250 }, { "epoch": 0.4067304001316102, "grad_norm": 6.23451601461986, "learning_rate": 6.717826993967578e-06, "loss": 17.6653, "step": 22251 }, { "epoch": 0.40674867932805675, "grad_norm": 6.995557174581337, "learning_rate": 6.717548998010454e-06, "loss": 17.8749, "step": 22252 }, { "epoch": 0.4067669585245033, "grad_norm": 7.147895900000048, "learning_rate": 6.717270996033467e-06, "loss": 17.6818, "step": 22253 }, { "epoch": 0.40678523772094977, "grad_norm": 5.875860858663323, "learning_rate": 6.716992988037594e-06, "loss": 17.5146, "step": 22254 }, { "epoch": 0.4068035169173963, "grad_norm": 7.237372281884785, "learning_rate": 6.7167149740238125e-06, "loss": 17.839, "step": 22255 }, { "epoch": 0.40682179611384284, "grad_norm": 7.264890108518054, "learning_rate": 6.716436953993092e-06, "loss": 17.7052, "step": 22256 }, { "epoch": 0.4068400753102894, "grad_norm": 6.724171135001746, "learning_rate": 6.716158927946408e-06, "loss": 17.6778, "step": 22257 }, { "epoch": 0.4068583545067359, "grad_norm": 5.578468251230984, "learning_rate": 6.715880895884738e-06, "loss": 17.4131, "step": 22258 }, { "epoch": 0.4068766337031824, "grad_norm": 6.560338080352419, "learning_rate": 6.715602857809052e-06, "loss": 17.6146, "step": 22259 }, { "epoch": 0.4068949128996289, "grad_norm": 6.9180291916264975, "learning_rate": 6.715324813720329e-06, "loss": 17.9467, "step": 22260 }, { "epoch": 0.40691319209607546, "grad_norm": 5.622453941909864, "learning_rate": 6.715046763619541e-06, "loss": 17.2836, "step": 22261 }, { "epoch": 0.406931471292522, "grad_norm": 8.176080449579668, "learning_rate": 6.714768707507662e-06, "loss": 18.2418, "step": 22262 }, { "epoch": 0.40694975048896853, "grad_norm": 6.274976231881443, "learning_rate": 6.714490645385667e-06, "loss": 17.7055, "step": 22263 }, { "epoch": 0.406968029685415, "grad_norm": 7.577819589305502, "learning_rate": 6.714212577254533e-06, "loss": 18.304, "step": 22264 }, { "epoch": 0.40698630888186155, "grad_norm": 6.573095639871201, "learning_rate": 6.713934503115232e-06, "loss": 17.6393, "step": 22265 }, { "epoch": 0.4070045880783081, "grad_norm": 6.295120292715528, "learning_rate": 6.713656422968739e-06, "loss": 17.264, "step": 22266 }, { "epoch": 0.4070228672747546, "grad_norm": 6.889377350472357, "learning_rate": 6.7133783368160275e-06, "loss": 17.6572, "step": 22267 }, { "epoch": 0.4070411464712011, "grad_norm": 6.40510907772885, "learning_rate": 6.713100244658075e-06, "loss": 17.3039, "step": 22268 }, { "epoch": 0.40705942566764763, "grad_norm": 7.383304609218675, "learning_rate": 6.7128221464958565e-06, "loss": 18.0031, "step": 22269 }, { "epoch": 0.40707770486409417, "grad_norm": 6.5984351382322215, "learning_rate": 6.712544042330342e-06, "loss": 17.3374, "step": 22270 }, { "epoch": 0.4070959840605407, "grad_norm": 6.941330714665212, "learning_rate": 6.7122659321625115e-06, "loss": 17.5744, "step": 22271 }, { "epoch": 0.40711426325698724, "grad_norm": 5.798958432628722, "learning_rate": 6.711987815993335e-06, "loss": 17.1436, "step": 22272 }, { "epoch": 0.4071325424534337, "grad_norm": 7.03506626909677, "learning_rate": 6.711709693823793e-06, "loss": 17.9521, "step": 22273 }, { "epoch": 0.40715082164988026, "grad_norm": 6.350586760113581, "learning_rate": 6.7114315656548554e-06, "loss": 17.3257, "step": 22274 }, { "epoch": 0.4071691008463268, "grad_norm": 5.637832901288479, "learning_rate": 6.711153431487498e-06, "loss": 17.2569, "step": 22275 }, { "epoch": 0.40718738004277333, "grad_norm": 5.1169365619830165, "learning_rate": 6.710875291322697e-06, "loss": 16.9187, "step": 22276 }, { "epoch": 0.40720565923921986, "grad_norm": 6.370177226031568, "learning_rate": 6.710597145161427e-06, "loss": 17.438, "step": 22277 }, { "epoch": 0.40722393843566634, "grad_norm": 5.952175061029821, "learning_rate": 6.710318993004662e-06, "loss": 17.1577, "step": 22278 }, { "epoch": 0.4072422176321129, "grad_norm": 6.831569909064983, "learning_rate": 6.710040834853377e-06, "loss": 17.7127, "step": 22279 }, { "epoch": 0.4072604968285594, "grad_norm": 6.590500632536857, "learning_rate": 6.709762670708548e-06, "loss": 17.4051, "step": 22280 }, { "epoch": 0.40727877602500595, "grad_norm": 7.026313355604036, "learning_rate": 6.709484500571148e-06, "loss": 17.7467, "step": 22281 }, { "epoch": 0.4072970552214525, "grad_norm": 5.977094423814589, "learning_rate": 6.709206324442154e-06, "loss": 17.2317, "step": 22282 }, { "epoch": 0.40731533441789897, "grad_norm": 5.483432737920128, "learning_rate": 6.708928142322542e-06, "loss": 17.2138, "step": 22283 }, { "epoch": 0.4073336136143455, "grad_norm": 6.767125319549047, "learning_rate": 6.708649954213282e-06, "loss": 17.7343, "step": 22284 }, { "epoch": 0.40735189281079204, "grad_norm": 6.409230829732946, "learning_rate": 6.708371760115354e-06, "loss": 17.5576, "step": 22285 }, { "epoch": 0.4073701720072386, "grad_norm": 6.469226127075871, "learning_rate": 6.7080935600297306e-06, "loss": 17.5401, "step": 22286 }, { "epoch": 0.4073884512036851, "grad_norm": 6.399617496700894, "learning_rate": 6.7078153539573874e-06, "loss": 17.5286, "step": 22287 }, { "epoch": 0.4074067304001316, "grad_norm": 6.639113903558887, "learning_rate": 6.7075371418993e-06, "loss": 17.3282, "step": 22288 }, { "epoch": 0.4074250095965781, "grad_norm": 6.1714005240139596, "learning_rate": 6.707258923856442e-06, "loss": 17.3113, "step": 22289 }, { "epoch": 0.40744328879302466, "grad_norm": 8.751944703567593, "learning_rate": 6.706980699829791e-06, "loss": 18.5986, "step": 22290 }, { "epoch": 0.4074615679894712, "grad_norm": 8.630337043098338, "learning_rate": 6.70670246982032e-06, "loss": 18.6933, "step": 22291 }, { "epoch": 0.40747984718591773, "grad_norm": 6.414471102860324, "learning_rate": 6.7064242338290055e-06, "loss": 17.7053, "step": 22292 }, { "epoch": 0.4074981263823642, "grad_norm": 5.758931406556247, "learning_rate": 6.706145991856823e-06, "loss": 17.2899, "step": 22293 }, { "epoch": 0.40751640557881075, "grad_norm": 5.454292115298666, "learning_rate": 6.705867743904744e-06, "loss": 17.2698, "step": 22294 }, { "epoch": 0.4075346847752573, "grad_norm": 6.140112711077134, "learning_rate": 6.705589489973748e-06, "loss": 17.2706, "step": 22295 }, { "epoch": 0.4075529639717038, "grad_norm": 6.569895549115014, "learning_rate": 6.705311230064809e-06, "loss": 17.3748, "step": 22296 }, { "epoch": 0.40757124316815035, "grad_norm": 5.985081258725076, "learning_rate": 6.705032964178903e-06, "loss": 17.5517, "step": 22297 }, { "epoch": 0.40758952236459683, "grad_norm": 8.285743999725959, "learning_rate": 6.704754692317004e-06, "loss": 18.3401, "step": 22298 }, { "epoch": 0.40760780156104337, "grad_norm": 6.897872490167506, "learning_rate": 6.7044764144800865e-06, "loss": 17.6131, "step": 22299 }, { "epoch": 0.4076260807574899, "grad_norm": 6.236834655809541, "learning_rate": 6.704198130669128e-06, "loss": 17.6524, "step": 22300 }, { "epoch": 0.40764435995393644, "grad_norm": 6.979269261389349, "learning_rate": 6.703919840885104e-06, "loss": 17.826, "step": 22301 }, { "epoch": 0.4076626391503829, "grad_norm": 7.847914823017599, "learning_rate": 6.703641545128987e-06, "loss": 17.8131, "step": 22302 }, { "epoch": 0.40768091834682946, "grad_norm": 7.6746039941179545, "learning_rate": 6.703363243401755e-06, "loss": 18.1945, "step": 22303 }, { "epoch": 0.407699197543276, "grad_norm": 7.115684433826195, "learning_rate": 6.703084935704383e-06, "loss": 18.0043, "step": 22304 }, { "epoch": 0.40771747673972253, "grad_norm": 5.602538289522343, "learning_rate": 6.7028066220378455e-06, "loss": 17.1549, "step": 22305 }, { "epoch": 0.40773575593616906, "grad_norm": 5.5303830787105435, "learning_rate": 6.702528302403118e-06, "loss": 17.3422, "step": 22306 }, { "epoch": 0.40775403513261554, "grad_norm": 6.849153816525019, "learning_rate": 6.702249976801179e-06, "loss": 17.395, "step": 22307 }, { "epoch": 0.4077723143290621, "grad_norm": 6.642799615662462, "learning_rate": 6.701971645232998e-06, "loss": 17.6234, "step": 22308 }, { "epoch": 0.4077905935255086, "grad_norm": 7.963889722521995, "learning_rate": 6.701693307699556e-06, "loss": 18.139, "step": 22309 }, { "epoch": 0.40780887272195515, "grad_norm": 5.621074242739525, "learning_rate": 6.701414964201828e-06, "loss": 17.2245, "step": 22310 }, { "epoch": 0.4078271519184017, "grad_norm": 5.369137339102429, "learning_rate": 6.701136614740786e-06, "loss": 17.2033, "step": 22311 }, { "epoch": 0.40784543111484817, "grad_norm": 6.241584735197689, "learning_rate": 6.700858259317409e-06, "loss": 17.3039, "step": 22312 }, { "epoch": 0.4078637103112947, "grad_norm": 6.024766009967673, "learning_rate": 6.700579897932669e-06, "loss": 17.3971, "step": 22313 }, { "epoch": 0.40788198950774124, "grad_norm": 5.472173140584315, "learning_rate": 6.700301530587547e-06, "loss": 16.9745, "step": 22314 }, { "epoch": 0.4079002687041878, "grad_norm": 6.249329870649448, "learning_rate": 6.700023157283014e-06, "loss": 17.3968, "step": 22315 }, { "epoch": 0.4079185479006343, "grad_norm": 6.131460890280038, "learning_rate": 6.6997447780200484e-06, "loss": 17.4808, "step": 22316 }, { "epoch": 0.4079368270970808, "grad_norm": 6.929633668418552, "learning_rate": 6.699466392799624e-06, "loss": 17.867, "step": 22317 }, { "epoch": 0.4079551062935273, "grad_norm": 7.152533941069182, "learning_rate": 6.6991880016227185e-06, "loss": 17.9286, "step": 22318 }, { "epoch": 0.40797338548997386, "grad_norm": 5.60451819455061, "learning_rate": 6.698909604490304e-06, "loss": 17.3974, "step": 22319 }, { "epoch": 0.4079916646864204, "grad_norm": 5.387246173205216, "learning_rate": 6.698631201403362e-06, "loss": 17.1566, "step": 22320 }, { "epoch": 0.40800994388286693, "grad_norm": 7.480777182546839, "learning_rate": 6.698352792362863e-06, "loss": 17.9302, "step": 22321 }, { "epoch": 0.4080282230793134, "grad_norm": 5.409482611641295, "learning_rate": 6.698074377369786e-06, "loss": 17.2391, "step": 22322 }, { "epoch": 0.40804650227575995, "grad_norm": 6.5794473185928055, "learning_rate": 6.697795956425104e-06, "loss": 17.4969, "step": 22323 }, { "epoch": 0.4080647814722065, "grad_norm": 6.755483181362662, "learning_rate": 6.6975175295297964e-06, "loss": 17.7137, "step": 22324 }, { "epoch": 0.408083060668653, "grad_norm": 7.968078334194961, "learning_rate": 6.697239096684837e-06, "loss": 18.395, "step": 22325 }, { "epoch": 0.40810133986509955, "grad_norm": 5.974473819363313, "learning_rate": 6.696960657891199e-06, "loss": 17.447, "step": 22326 }, { "epoch": 0.40811961906154604, "grad_norm": 6.771331882821818, "learning_rate": 6.696682213149864e-06, "loss": 17.6519, "step": 22327 }, { "epoch": 0.40813789825799257, "grad_norm": 6.071063538193178, "learning_rate": 6.696403762461805e-06, "loss": 17.2803, "step": 22328 }, { "epoch": 0.4081561774544391, "grad_norm": 5.61989192197024, "learning_rate": 6.6961253058279975e-06, "loss": 17.1686, "step": 22329 }, { "epoch": 0.40817445665088564, "grad_norm": 7.3972282820148205, "learning_rate": 6.695846843249418e-06, "loss": 17.6974, "step": 22330 }, { "epoch": 0.4081927358473322, "grad_norm": 6.140066467892387, "learning_rate": 6.695568374727042e-06, "loss": 17.3713, "step": 22331 }, { "epoch": 0.40821101504377866, "grad_norm": 7.578357768826225, "learning_rate": 6.695289900261847e-06, "loss": 17.9596, "step": 22332 }, { "epoch": 0.4082292942402252, "grad_norm": 6.018563344980862, "learning_rate": 6.6950114198548065e-06, "loss": 17.2115, "step": 22333 }, { "epoch": 0.40824757343667173, "grad_norm": 5.8043348744124135, "learning_rate": 6.694732933506899e-06, "loss": 17.4287, "step": 22334 }, { "epoch": 0.40826585263311826, "grad_norm": 6.321735976651865, "learning_rate": 6.694454441219099e-06, "loss": 17.6359, "step": 22335 }, { "epoch": 0.40828413182956474, "grad_norm": 5.996166355033848, "learning_rate": 6.694175942992385e-06, "loss": 17.3165, "step": 22336 }, { "epoch": 0.4083024110260113, "grad_norm": 6.517665764543585, "learning_rate": 6.69389743882773e-06, "loss": 17.792, "step": 22337 }, { "epoch": 0.4083206902224578, "grad_norm": 6.617217406232964, "learning_rate": 6.693618928726112e-06, "loss": 17.7017, "step": 22338 }, { "epoch": 0.40833896941890435, "grad_norm": 6.2426880409963506, "learning_rate": 6.693340412688506e-06, "loss": 17.3849, "step": 22339 }, { "epoch": 0.4083572486153509, "grad_norm": 6.739043792869497, "learning_rate": 6.6930618907158885e-06, "loss": 17.6292, "step": 22340 }, { "epoch": 0.40837552781179737, "grad_norm": 6.193059522850531, "learning_rate": 6.692783362809237e-06, "loss": 17.6269, "step": 22341 }, { "epoch": 0.4083938070082439, "grad_norm": 8.52446269237601, "learning_rate": 6.692504828969526e-06, "loss": 18.5218, "step": 22342 }, { "epoch": 0.40841208620469044, "grad_norm": 8.91671047362675, "learning_rate": 6.692226289197732e-06, "loss": 18.3849, "step": 22343 }, { "epoch": 0.408430365401137, "grad_norm": 7.340285727156134, "learning_rate": 6.691947743494834e-06, "loss": 18.0694, "step": 22344 }, { "epoch": 0.4084486445975835, "grad_norm": 7.706295893171392, "learning_rate": 6.691669191861803e-06, "loss": 17.5781, "step": 22345 }, { "epoch": 0.40846692379403, "grad_norm": 5.679200024921107, "learning_rate": 6.691390634299619e-06, "loss": 17.2387, "step": 22346 }, { "epoch": 0.4084852029904765, "grad_norm": 6.6261268333057055, "learning_rate": 6.691112070809258e-06, "loss": 17.3913, "step": 22347 }, { "epoch": 0.40850348218692306, "grad_norm": 6.510059106698121, "learning_rate": 6.690833501391697e-06, "loss": 17.3602, "step": 22348 }, { "epoch": 0.4085217613833696, "grad_norm": 6.95710940300285, "learning_rate": 6.69055492604791e-06, "loss": 17.7947, "step": 22349 }, { "epoch": 0.40854004057981613, "grad_norm": 7.624081240706674, "learning_rate": 6.690276344778875e-06, "loss": 17.8705, "step": 22350 }, { "epoch": 0.4085583197762626, "grad_norm": 7.093745915585537, "learning_rate": 6.689997757585568e-06, "loss": 17.3084, "step": 22351 }, { "epoch": 0.40857659897270915, "grad_norm": 5.625715074646376, "learning_rate": 6.689719164468967e-06, "loss": 17.3643, "step": 22352 }, { "epoch": 0.4085948781691557, "grad_norm": 6.754054896581014, "learning_rate": 6.689440565430044e-06, "loss": 17.7577, "step": 22353 }, { "epoch": 0.4086131573656022, "grad_norm": 6.159552515619195, "learning_rate": 6.68916196046978e-06, "loss": 17.4922, "step": 22354 }, { "epoch": 0.40863143656204876, "grad_norm": 6.114677035172412, "learning_rate": 6.688883349589151e-06, "loss": 17.4217, "step": 22355 }, { "epoch": 0.40864971575849524, "grad_norm": 6.560570286473523, "learning_rate": 6.688604732789131e-06, "loss": 17.772, "step": 22356 }, { "epoch": 0.40866799495494177, "grad_norm": 5.437434771663608, "learning_rate": 6.6883261100706985e-06, "loss": 17.0448, "step": 22357 }, { "epoch": 0.4086862741513883, "grad_norm": 6.938271909855448, "learning_rate": 6.6880474814348285e-06, "loss": 17.8065, "step": 22358 }, { "epoch": 0.40870455334783484, "grad_norm": 7.007961642145968, "learning_rate": 6.687768846882501e-06, "loss": 17.8035, "step": 22359 }, { "epoch": 0.4087228325442814, "grad_norm": 6.780869384648125, "learning_rate": 6.687490206414689e-06, "loss": 17.6165, "step": 22360 }, { "epoch": 0.40874111174072786, "grad_norm": 6.877968688405431, "learning_rate": 6.687211560032368e-06, "loss": 17.798, "step": 22361 }, { "epoch": 0.4087593909371744, "grad_norm": 5.986241171382858, "learning_rate": 6.686932907736518e-06, "loss": 17.3656, "step": 22362 }, { "epoch": 0.40877767013362093, "grad_norm": 6.660632585050558, "learning_rate": 6.686654249528116e-06, "loss": 17.5422, "step": 22363 }, { "epoch": 0.40879594933006747, "grad_norm": 6.241903161658502, "learning_rate": 6.686375585408137e-06, "loss": 17.5906, "step": 22364 }, { "epoch": 0.408814228526514, "grad_norm": 5.598750220896844, "learning_rate": 6.686096915377557e-06, "loss": 17.164, "step": 22365 }, { "epoch": 0.4088325077229605, "grad_norm": 6.348137366423458, "learning_rate": 6.685818239437355e-06, "loss": 17.6996, "step": 22366 }, { "epoch": 0.408850786919407, "grad_norm": 6.0425301995955, "learning_rate": 6.685539557588504e-06, "loss": 17.452, "step": 22367 }, { "epoch": 0.40886906611585355, "grad_norm": 6.012375619513796, "learning_rate": 6.685260869831984e-06, "loss": 17.445, "step": 22368 }, { "epoch": 0.4088873453123001, "grad_norm": 6.472431702978844, "learning_rate": 6.684982176168773e-06, "loss": 17.4102, "step": 22369 }, { "epoch": 0.40890562450874657, "grad_norm": 6.952900079004711, "learning_rate": 6.684703476599844e-06, "loss": 17.6931, "step": 22370 }, { "epoch": 0.4089239037051931, "grad_norm": 6.195808589104972, "learning_rate": 6.684424771126176e-06, "loss": 17.4166, "step": 22371 }, { "epoch": 0.40894218290163964, "grad_norm": 6.543064768324731, "learning_rate": 6.684146059748743e-06, "loss": 17.9098, "step": 22372 }, { "epoch": 0.4089604620980862, "grad_norm": 4.7435375951410075, "learning_rate": 6.683867342468528e-06, "loss": 16.8005, "step": 22373 }, { "epoch": 0.4089787412945327, "grad_norm": 5.4370720825517616, "learning_rate": 6.683588619286501e-06, "loss": 17.1992, "step": 22374 }, { "epoch": 0.4089970204909792, "grad_norm": 6.171659769049901, "learning_rate": 6.683309890203643e-06, "loss": 17.4659, "step": 22375 }, { "epoch": 0.4090152996874257, "grad_norm": 5.8467812409448845, "learning_rate": 6.683031155220931e-06, "loss": 17.288, "step": 22376 }, { "epoch": 0.40903357888387226, "grad_norm": 7.396621978798048, "learning_rate": 6.682752414339339e-06, "loss": 18.0265, "step": 22377 }, { "epoch": 0.4090518580803188, "grad_norm": 6.049778369575566, "learning_rate": 6.682473667559847e-06, "loss": 17.7492, "step": 22378 }, { "epoch": 0.40907013727676533, "grad_norm": 6.371293358168965, "learning_rate": 6.682194914883431e-06, "loss": 17.5876, "step": 22379 }, { "epoch": 0.4090884164732118, "grad_norm": 6.525303404567865, "learning_rate": 6.681916156311068e-06, "loss": 17.7739, "step": 22380 }, { "epoch": 0.40910669566965835, "grad_norm": 7.041370823694498, "learning_rate": 6.681637391843732e-06, "loss": 17.8203, "step": 22381 }, { "epoch": 0.4091249748661049, "grad_norm": 6.3008761005745155, "learning_rate": 6.681358621482405e-06, "loss": 17.5252, "step": 22382 }, { "epoch": 0.4091432540625514, "grad_norm": 7.765356271583535, "learning_rate": 6.6810798452280635e-06, "loss": 18.2771, "step": 22383 }, { "epoch": 0.40916153325899796, "grad_norm": 6.365913360250412, "learning_rate": 6.680801063081681e-06, "loss": 17.4948, "step": 22384 }, { "epoch": 0.40917981245544444, "grad_norm": 6.318444715530758, "learning_rate": 6.6805222750442366e-06, "loss": 17.4903, "step": 22385 }, { "epoch": 0.40919809165189097, "grad_norm": 6.2602657409691345, "learning_rate": 6.680243481116708e-06, "loss": 17.344, "step": 22386 }, { "epoch": 0.4092163708483375, "grad_norm": 7.076340057322319, "learning_rate": 6.679964681300073e-06, "loss": 17.7203, "step": 22387 }, { "epoch": 0.40923465004478404, "grad_norm": 7.4483993347309765, "learning_rate": 6.679685875595305e-06, "loss": 18.0464, "step": 22388 }, { "epoch": 0.4092529292412306, "grad_norm": 8.177442978394508, "learning_rate": 6.679407064003386e-06, "loss": 18.2687, "step": 22389 }, { "epoch": 0.40927120843767706, "grad_norm": 5.67031421466407, "learning_rate": 6.6791282465252895e-06, "loss": 17.2061, "step": 22390 }, { "epoch": 0.4092894876341236, "grad_norm": 6.073479551777875, "learning_rate": 6.678849423161995e-06, "loss": 17.29, "step": 22391 }, { "epoch": 0.40930776683057013, "grad_norm": 5.908413856599679, "learning_rate": 6.678570593914478e-06, "loss": 17.412, "step": 22392 }, { "epoch": 0.40932604602701667, "grad_norm": 6.850784777996676, "learning_rate": 6.678291758783719e-06, "loss": 17.8967, "step": 22393 }, { "epoch": 0.4093443252234632, "grad_norm": 7.8328601566665, "learning_rate": 6.6780129177706895e-06, "loss": 18.6999, "step": 22394 }, { "epoch": 0.4093626044199097, "grad_norm": 6.272933520527842, "learning_rate": 6.677734070876373e-06, "loss": 17.6513, "step": 22395 }, { "epoch": 0.4093808836163562, "grad_norm": 6.514965334561195, "learning_rate": 6.677455218101743e-06, "loss": 17.4764, "step": 22396 }, { "epoch": 0.40939916281280275, "grad_norm": 6.005791253197453, "learning_rate": 6.67717635944778e-06, "loss": 17.0802, "step": 22397 }, { "epoch": 0.4094174420092493, "grad_norm": 6.7383995685433895, "learning_rate": 6.676897494915457e-06, "loss": 17.8608, "step": 22398 }, { "epoch": 0.4094357212056958, "grad_norm": 8.845025797527725, "learning_rate": 6.6766186245057544e-06, "loss": 17.5709, "step": 22399 }, { "epoch": 0.4094540004021423, "grad_norm": 6.539898475467322, "learning_rate": 6.67633974821965e-06, "loss": 17.4487, "step": 22400 }, { "epoch": 0.40947227959858884, "grad_norm": 6.476897413372039, "learning_rate": 6.67606086605812e-06, "loss": 17.3881, "step": 22401 }, { "epoch": 0.4094905587950354, "grad_norm": 6.139891906179132, "learning_rate": 6.675781978022141e-06, "loss": 17.5398, "step": 22402 }, { "epoch": 0.4095088379914819, "grad_norm": 6.82874764899981, "learning_rate": 6.675503084112692e-06, "loss": 17.5812, "step": 22403 }, { "epoch": 0.4095271171879284, "grad_norm": 5.798182208057855, "learning_rate": 6.675224184330751e-06, "loss": 17.6063, "step": 22404 }, { "epoch": 0.4095453963843749, "grad_norm": 6.5577458900205485, "learning_rate": 6.674945278677294e-06, "loss": 17.4989, "step": 22405 }, { "epoch": 0.40956367558082146, "grad_norm": 6.039979097365193, "learning_rate": 6.674666367153299e-06, "loss": 17.5457, "step": 22406 }, { "epoch": 0.409581954777268, "grad_norm": 7.492943735372451, "learning_rate": 6.674387449759744e-06, "loss": 18.4872, "step": 22407 }, { "epoch": 0.40960023397371453, "grad_norm": 5.967778949376407, "learning_rate": 6.674108526497605e-06, "loss": 17.2267, "step": 22408 }, { "epoch": 0.409618513170161, "grad_norm": 6.551757154325815, "learning_rate": 6.673829597367862e-06, "loss": 17.5845, "step": 22409 }, { "epoch": 0.40963679236660755, "grad_norm": 6.43448251059287, "learning_rate": 6.673550662371491e-06, "loss": 17.5492, "step": 22410 }, { "epoch": 0.4096550715630541, "grad_norm": 5.390735848441046, "learning_rate": 6.673271721509471e-06, "loss": 17.1281, "step": 22411 }, { "epoch": 0.4096733507595006, "grad_norm": 5.562757320291179, "learning_rate": 6.672992774782779e-06, "loss": 17.3111, "step": 22412 }, { "epoch": 0.40969162995594716, "grad_norm": 7.848481641711909, "learning_rate": 6.672713822192392e-06, "loss": 17.7411, "step": 22413 }, { "epoch": 0.40970990915239364, "grad_norm": 5.705962222544973, "learning_rate": 6.672434863739288e-06, "loss": 17.0998, "step": 22414 }, { "epoch": 0.40972818834884017, "grad_norm": 8.514607143462618, "learning_rate": 6.672155899424445e-06, "loss": 18.5722, "step": 22415 }, { "epoch": 0.4097464675452867, "grad_norm": 5.8988134022637535, "learning_rate": 6.6718769292488406e-06, "loss": 17.197, "step": 22416 }, { "epoch": 0.40976474674173324, "grad_norm": 6.690551317855109, "learning_rate": 6.6715979532134535e-06, "loss": 17.8692, "step": 22417 }, { "epoch": 0.4097830259381798, "grad_norm": 7.3224953502855, "learning_rate": 6.67131897131926e-06, "loss": 18.1417, "step": 22418 }, { "epoch": 0.40980130513462626, "grad_norm": 7.578450397318651, "learning_rate": 6.671039983567238e-06, "loss": 17.9822, "step": 22419 }, { "epoch": 0.4098195843310728, "grad_norm": 7.047372506277028, "learning_rate": 6.670760989958366e-06, "loss": 17.5138, "step": 22420 }, { "epoch": 0.40983786352751933, "grad_norm": 6.500958699879609, "learning_rate": 6.670481990493621e-06, "loss": 17.5217, "step": 22421 }, { "epoch": 0.40985614272396587, "grad_norm": 6.591173975816295, "learning_rate": 6.670202985173983e-06, "loss": 17.4485, "step": 22422 }, { "epoch": 0.4098744219204124, "grad_norm": 6.497177345976398, "learning_rate": 6.669923974000429e-06, "loss": 17.5386, "step": 22423 }, { "epoch": 0.4098927011168589, "grad_norm": 5.355490015264795, "learning_rate": 6.669644956973935e-06, "loss": 17.0498, "step": 22424 }, { "epoch": 0.4099109803133054, "grad_norm": 6.149706652710731, "learning_rate": 6.6693659340954804e-06, "loss": 17.5281, "step": 22425 }, { "epoch": 0.40992925950975195, "grad_norm": 5.630134258395935, "learning_rate": 6.669086905366043e-06, "loss": 17.379, "step": 22426 }, { "epoch": 0.4099475387061985, "grad_norm": 6.22113460738467, "learning_rate": 6.668807870786601e-06, "loss": 17.5218, "step": 22427 }, { "epoch": 0.409965817902645, "grad_norm": 6.225699203355149, "learning_rate": 6.668528830358134e-06, "loss": 17.3983, "step": 22428 }, { "epoch": 0.4099840970990915, "grad_norm": 6.5146956328387775, "learning_rate": 6.668249784081616e-06, "loss": 17.6844, "step": 22429 }, { "epoch": 0.41000237629553804, "grad_norm": 7.210863668792879, "learning_rate": 6.667970731958029e-06, "loss": 17.667, "step": 22430 }, { "epoch": 0.4100206554919846, "grad_norm": 7.243398059648909, "learning_rate": 6.667691673988348e-06, "loss": 17.4272, "step": 22431 }, { "epoch": 0.4100389346884311, "grad_norm": 5.536042701374809, "learning_rate": 6.667412610173552e-06, "loss": 17.3022, "step": 22432 }, { "epoch": 0.41005721388487765, "grad_norm": 6.944375066789045, "learning_rate": 6.667133540514621e-06, "loss": 17.4673, "step": 22433 }, { "epoch": 0.4100754930813241, "grad_norm": 6.800971125614902, "learning_rate": 6.66685446501253e-06, "loss": 17.7474, "step": 22434 }, { "epoch": 0.41009377227777066, "grad_norm": 5.855007406131806, "learning_rate": 6.666575383668259e-06, "loss": 17.2631, "step": 22435 }, { "epoch": 0.4101120514742172, "grad_norm": 7.458053534102518, "learning_rate": 6.666296296482787e-06, "loss": 17.7256, "step": 22436 }, { "epoch": 0.41013033067066373, "grad_norm": 6.04020474663933, "learning_rate": 6.66601720345709e-06, "loss": 17.214, "step": 22437 }, { "epoch": 0.4101486098671102, "grad_norm": 7.315070036328204, "learning_rate": 6.665738104592149e-06, "loss": 17.8686, "step": 22438 }, { "epoch": 0.41016688906355675, "grad_norm": 6.991834659225314, "learning_rate": 6.66545899988894e-06, "loss": 17.6729, "step": 22439 }, { "epoch": 0.4101851682600033, "grad_norm": 6.1356000894297935, "learning_rate": 6.665179889348438e-06, "loss": 17.457, "step": 22440 }, { "epoch": 0.4102034474564498, "grad_norm": 7.530821124138062, "learning_rate": 6.66490077297163e-06, "loss": 18.1345, "step": 22441 }, { "epoch": 0.41022172665289636, "grad_norm": 7.91404525806891, "learning_rate": 6.664621650759487e-06, "loss": 18.0275, "step": 22442 }, { "epoch": 0.41024000584934284, "grad_norm": 6.957079326907108, "learning_rate": 6.66434252271299e-06, "loss": 17.6925, "step": 22443 }, { "epoch": 0.4102582850457894, "grad_norm": 6.195271870605417, "learning_rate": 6.664063388833116e-06, "loss": 17.381, "step": 22444 }, { "epoch": 0.4102765642422359, "grad_norm": 6.692512343516527, "learning_rate": 6.663784249120846e-06, "loss": 17.422, "step": 22445 }, { "epoch": 0.41029484343868244, "grad_norm": 4.662301471960729, "learning_rate": 6.663505103577155e-06, "loss": 16.8443, "step": 22446 }, { "epoch": 0.410313122635129, "grad_norm": 6.161177716814195, "learning_rate": 6.663225952203023e-06, "loss": 17.5143, "step": 22447 }, { "epoch": 0.41033140183157546, "grad_norm": 7.114737601813586, "learning_rate": 6.66294679499943e-06, "loss": 17.5671, "step": 22448 }, { "epoch": 0.410349681028022, "grad_norm": 6.490208520600908, "learning_rate": 6.662667631967351e-06, "loss": 17.5853, "step": 22449 }, { "epoch": 0.41036796022446853, "grad_norm": 6.788196061136019, "learning_rate": 6.6623884631077664e-06, "loss": 17.6763, "step": 22450 }, { "epoch": 0.41038623942091507, "grad_norm": 7.399119123919406, "learning_rate": 6.6621092884216555e-06, "loss": 18.0581, "step": 22451 }, { "epoch": 0.4104045186173616, "grad_norm": 5.616390064467137, "learning_rate": 6.661830107909996e-06, "loss": 17.32, "step": 22452 }, { "epoch": 0.4104227978138081, "grad_norm": 8.262323097739829, "learning_rate": 6.661550921573764e-06, "loss": 18.1139, "step": 22453 }, { "epoch": 0.4104410770102546, "grad_norm": 6.9899293270499285, "learning_rate": 6.661271729413942e-06, "loss": 17.7583, "step": 22454 }, { "epoch": 0.41045935620670115, "grad_norm": 7.651297782205858, "learning_rate": 6.660992531431507e-06, "loss": 18.151, "step": 22455 }, { "epoch": 0.4104776354031477, "grad_norm": 6.046370442855944, "learning_rate": 6.660713327627437e-06, "loss": 17.24, "step": 22456 }, { "epoch": 0.4104959145995942, "grad_norm": 6.8742298685763, "learning_rate": 6.66043411800271e-06, "loss": 17.7414, "step": 22457 }, { "epoch": 0.4105141937960407, "grad_norm": 8.311854084271925, "learning_rate": 6.660154902558304e-06, "loss": 18.2518, "step": 22458 }, { "epoch": 0.41053247299248724, "grad_norm": 7.01890204473356, "learning_rate": 6.6598756812952026e-06, "loss": 17.697, "step": 22459 }, { "epoch": 0.4105507521889338, "grad_norm": 6.646090362366263, "learning_rate": 6.65959645421438e-06, "loss": 17.7471, "step": 22460 }, { "epoch": 0.4105690313853803, "grad_norm": 5.416482398601893, "learning_rate": 6.659317221316815e-06, "loss": 17.3344, "step": 22461 }, { "epoch": 0.41058731058182685, "grad_norm": 5.019278069723802, "learning_rate": 6.659037982603488e-06, "loss": 16.9551, "step": 22462 }, { "epoch": 0.4106055897782733, "grad_norm": 6.676420183258173, "learning_rate": 6.658758738075376e-06, "loss": 17.7794, "step": 22463 }, { "epoch": 0.41062386897471986, "grad_norm": 6.048997414497438, "learning_rate": 6.658479487733459e-06, "loss": 17.308, "step": 22464 }, { "epoch": 0.4106421481711664, "grad_norm": 7.98950980278614, "learning_rate": 6.6582002315787155e-06, "loss": 17.9655, "step": 22465 }, { "epoch": 0.41066042736761293, "grad_norm": 5.991122565262289, "learning_rate": 6.657920969612124e-06, "loss": 17.2028, "step": 22466 }, { "epoch": 0.41067870656405947, "grad_norm": 7.535391185851954, "learning_rate": 6.657641701834663e-06, "loss": 18.0067, "step": 22467 }, { "epoch": 0.41069698576050595, "grad_norm": 7.395234063393955, "learning_rate": 6.657362428247311e-06, "loss": 17.7825, "step": 22468 }, { "epoch": 0.4107152649569525, "grad_norm": 6.148243444602376, "learning_rate": 6.65708314885105e-06, "loss": 17.6456, "step": 22469 }, { "epoch": 0.410733544153399, "grad_norm": 6.940659798109537, "learning_rate": 6.656803863646855e-06, "loss": 17.3144, "step": 22470 }, { "epoch": 0.41075182334984556, "grad_norm": 5.984508152192915, "learning_rate": 6.656524572635705e-06, "loss": 17.3747, "step": 22471 }, { "epoch": 0.41077010254629204, "grad_norm": 8.984030284838962, "learning_rate": 6.65624527581858e-06, "loss": 18.2754, "step": 22472 }, { "epoch": 0.4107883817427386, "grad_norm": 6.874324901779238, "learning_rate": 6.655965973196461e-06, "loss": 17.8326, "step": 22473 }, { "epoch": 0.4108066609391851, "grad_norm": 6.649152355038819, "learning_rate": 6.655686664770324e-06, "loss": 17.6961, "step": 22474 }, { "epoch": 0.41082494013563164, "grad_norm": 8.136475477998427, "learning_rate": 6.6554073505411495e-06, "loss": 17.9943, "step": 22475 }, { "epoch": 0.4108432193320782, "grad_norm": 6.43012584151685, "learning_rate": 6.655128030509915e-06, "loss": 17.5833, "step": 22476 }, { "epoch": 0.41086149852852466, "grad_norm": 7.342883675959253, "learning_rate": 6.654848704677601e-06, "loss": 17.7023, "step": 22477 }, { "epoch": 0.4108797777249712, "grad_norm": 8.305732709692458, "learning_rate": 6.654569373045185e-06, "loss": 18.3501, "step": 22478 }, { "epoch": 0.41089805692141773, "grad_norm": 5.804968513432319, "learning_rate": 6.654290035613649e-06, "loss": 17.3626, "step": 22479 }, { "epoch": 0.41091633611786427, "grad_norm": 6.065199581638211, "learning_rate": 6.654010692383967e-06, "loss": 17.1871, "step": 22480 }, { "epoch": 0.4109346153143108, "grad_norm": 5.798473532906206, "learning_rate": 6.653731343357123e-06, "loss": 17.3126, "step": 22481 }, { "epoch": 0.4109528945107573, "grad_norm": 6.415907022619494, "learning_rate": 6.653451988534094e-06, "loss": 17.5288, "step": 22482 }, { "epoch": 0.4109711737072038, "grad_norm": 7.033875247971466, "learning_rate": 6.6531726279158595e-06, "loss": 17.7119, "step": 22483 }, { "epoch": 0.41098945290365035, "grad_norm": 8.652061811686448, "learning_rate": 6.652893261503398e-06, "loss": 18.2471, "step": 22484 }, { "epoch": 0.4110077321000969, "grad_norm": 6.00480941005815, "learning_rate": 6.6526138892976875e-06, "loss": 17.4825, "step": 22485 }, { "epoch": 0.4110260112965434, "grad_norm": 6.899236062989019, "learning_rate": 6.652334511299712e-06, "loss": 17.4574, "step": 22486 }, { "epoch": 0.4110442904929899, "grad_norm": 6.592482875960514, "learning_rate": 6.652055127510445e-06, "loss": 17.6955, "step": 22487 }, { "epoch": 0.41106256968943644, "grad_norm": 5.799762215987938, "learning_rate": 6.651775737930869e-06, "loss": 17.2804, "step": 22488 }, { "epoch": 0.411080848885883, "grad_norm": 6.8970588756919256, "learning_rate": 6.651496342561962e-06, "loss": 17.3843, "step": 22489 }, { "epoch": 0.4110991280823295, "grad_norm": 6.318700315418928, "learning_rate": 6.651216941404703e-06, "loss": 17.7247, "step": 22490 }, { "epoch": 0.41111740727877605, "grad_norm": 6.513451357168261, "learning_rate": 6.650937534460074e-06, "loss": 17.484, "step": 22491 }, { "epoch": 0.41113568647522253, "grad_norm": 6.453364609413719, "learning_rate": 6.65065812172905e-06, "loss": 17.474, "step": 22492 }, { "epoch": 0.41115396567166906, "grad_norm": 5.901676580688798, "learning_rate": 6.650378703212614e-06, "loss": 17.3427, "step": 22493 }, { "epoch": 0.4111722448681156, "grad_norm": 6.035202696120195, "learning_rate": 6.650099278911742e-06, "loss": 17.5816, "step": 22494 }, { "epoch": 0.41119052406456214, "grad_norm": 6.328909776131016, "learning_rate": 6.649819848827417e-06, "loss": 17.407, "step": 22495 }, { "epoch": 0.41120880326100867, "grad_norm": 5.54156689266959, "learning_rate": 6.649540412960616e-06, "loss": 17.1902, "step": 22496 }, { "epoch": 0.41122708245745515, "grad_norm": 6.867040320968979, "learning_rate": 6.649260971312319e-06, "loss": 17.8289, "step": 22497 }, { "epoch": 0.4112453616539017, "grad_norm": 5.781974445936039, "learning_rate": 6.648981523883506e-06, "loss": 17.2794, "step": 22498 }, { "epoch": 0.4112636408503482, "grad_norm": 6.866986400203878, "learning_rate": 6.6487020706751535e-06, "loss": 17.5703, "step": 22499 }, { "epoch": 0.41128192004679476, "grad_norm": 6.25314561124634, "learning_rate": 6.648422611688247e-06, "loss": 17.3703, "step": 22500 }, { "epoch": 0.4113001992432413, "grad_norm": 5.527027959992802, "learning_rate": 6.64814314692376e-06, "loss": 17.0699, "step": 22501 }, { "epoch": 0.4113184784396878, "grad_norm": 9.991418748524842, "learning_rate": 6.6478636763826745e-06, "loss": 17.6793, "step": 22502 }, { "epoch": 0.4113367576361343, "grad_norm": 6.23830929250894, "learning_rate": 6.647584200065971e-06, "loss": 17.2719, "step": 22503 }, { "epoch": 0.41135503683258084, "grad_norm": 7.572286135394841, "learning_rate": 6.647304717974626e-06, "loss": 18.0421, "step": 22504 }, { "epoch": 0.4113733160290274, "grad_norm": 5.261262791016685, "learning_rate": 6.647025230109622e-06, "loss": 17.0682, "step": 22505 }, { "epoch": 0.41139159522547386, "grad_norm": 6.678915311011433, "learning_rate": 6.646745736471936e-06, "loss": 17.4591, "step": 22506 }, { "epoch": 0.4114098744219204, "grad_norm": 5.753913399849667, "learning_rate": 6.646466237062551e-06, "loss": 17.1814, "step": 22507 }, { "epoch": 0.41142815361836693, "grad_norm": 9.14217943399806, "learning_rate": 6.646186731882444e-06, "loss": 18.6355, "step": 22508 }, { "epoch": 0.41144643281481347, "grad_norm": 6.604336182941888, "learning_rate": 6.645907220932595e-06, "loss": 17.7191, "step": 22509 }, { "epoch": 0.41146471201126, "grad_norm": 5.813419040298178, "learning_rate": 6.645627704213985e-06, "loss": 17.2996, "step": 22510 }, { "epoch": 0.4114829912077065, "grad_norm": 6.552131724035272, "learning_rate": 6.645348181727594e-06, "loss": 17.4184, "step": 22511 }, { "epoch": 0.411501270404153, "grad_norm": 6.598285296345405, "learning_rate": 6.645068653474396e-06, "loss": 17.5236, "step": 22512 }, { "epoch": 0.41151954960059955, "grad_norm": 5.676660695960994, "learning_rate": 6.644789119455377e-06, "loss": 16.8865, "step": 22513 }, { "epoch": 0.4115378287970461, "grad_norm": 5.061233180440918, "learning_rate": 6.644509579671517e-06, "loss": 16.8625, "step": 22514 }, { "epoch": 0.4115561079934926, "grad_norm": 6.514822594679821, "learning_rate": 6.644230034123792e-06, "loss": 17.4267, "step": 22515 }, { "epoch": 0.4115743871899391, "grad_norm": 4.932665565234066, "learning_rate": 6.643950482813184e-06, "loss": 17.12, "step": 22516 }, { "epoch": 0.41159266638638564, "grad_norm": 6.643103736082339, "learning_rate": 6.643670925740672e-06, "loss": 17.9247, "step": 22517 }, { "epoch": 0.4116109455828322, "grad_norm": 6.605073545749215, "learning_rate": 6.643391362907235e-06, "loss": 17.5981, "step": 22518 }, { "epoch": 0.4116292247792787, "grad_norm": 7.392426403764373, "learning_rate": 6.643111794313855e-06, "loss": 17.9218, "step": 22519 }, { "epoch": 0.41164750397572525, "grad_norm": 6.84728674931774, "learning_rate": 6.6428322199615106e-06, "loss": 17.656, "step": 22520 }, { "epoch": 0.41166578317217173, "grad_norm": 5.647548514640339, "learning_rate": 6.64255263985118e-06, "loss": 17.2701, "step": 22521 }, { "epoch": 0.41168406236861826, "grad_norm": 5.552168435205575, "learning_rate": 6.642273053983848e-06, "loss": 17.0794, "step": 22522 }, { "epoch": 0.4117023415650648, "grad_norm": 6.78696796718912, "learning_rate": 6.64199346236049e-06, "loss": 17.6392, "step": 22523 }, { "epoch": 0.41172062076151134, "grad_norm": 7.092229779419543, "learning_rate": 6.641713864982088e-06, "loss": 17.7869, "step": 22524 }, { "epoch": 0.41173889995795787, "grad_norm": 5.618666480432428, "learning_rate": 6.641434261849621e-06, "loss": 17.4101, "step": 22525 }, { "epoch": 0.41175717915440435, "grad_norm": 6.476638868176701, "learning_rate": 6.641154652964068e-06, "loss": 17.6316, "step": 22526 }, { "epoch": 0.4117754583508509, "grad_norm": 7.067195271851774, "learning_rate": 6.640875038326411e-06, "loss": 18.0322, "step": 22527 }, { "epoch": 0.4117937375472974, "grad_norm": 6.82609681223036, "learning_rate": 6.640595417937631e-06, "loss": 17.7255, "step": 22528 }, { "epoch": 0.41181201674374396, "grad_norm": 7.708298233999794, "learning_rate": 6.640315791798705e-06, "loss": 17.8175, "step": 22529 }, { "epoch": 0.4118302959401905, "grad_norm": 5.429983974169377, "learning_rate": 6.640036159910614e-06, "loss": 17.2022, "step": 22530 }, { "epoch": 0.411848575136637, "grad_norm": 6.477489049801571, "learning_rate": 6.639756522274341e-06, "loss": 17.4828, "step": 22531 }, { "epoch": 0.4118668543330835, "grad_norm": 7.084888439645354, "learning_rate": 6.639476878890862e-06, "loss": 17.7816, "step": 22532 }, { "epoch": 0.41188513352953005, "grad_norm": 6.6279879840863645, "learning_rate": 6.639197229761158e-06, "loss": 17.7472, "step": 22533 }, { "epoch": 0.4119034127259766, "grad_norm": 6.007976655677513, "learning_rate": 6.638917574886211e-06, "loss": 17.5309, "step": 22534 }, { "epoch": 0.4119216919224231, "grad_norm": 6.343015233689347, "learning_rate": 6.6386379142669996e-06, "loss": 17.784, "step": 22535 }, { "epoch": 0.4119399711188696, "grad_norm": 6.820796339191304, "learning_rate": 6.638358247904505e-06, "loss": 17.9703, "step": 22536 }, { "epoch": 0.41195825031531613, "grad_norm": 5.85890992128528, "learning_rate": 6.638078575799707e-06, "loss": 17.3764, "step": 22537 }, { "epoch": 0.41197652951176267, "grad_norm": 7.074606037217186, "learning_rate": 6.637798897953585e-06, "loss": 17.8454, "step": 22538 }, { "epoch": 0.4119948087082092, "grad_norm": 5.784132235908096, "learning_rate": 6.637519214367121e-06, "loss": 17.1954, "step": 22539 }, { "epoch": 0.4120130879046557, "grad_norm": 6.025932167870897, "learning_rate": 6.637239525041293e-06, "loss": 17.3466, "step": 22540 }, { "epoch": 0.4120313671011022, "grad_norm": 6.169035862579544, "learning_rate": 6.636959829977083e-06, "loss": 17.4327, "step": 22541 }, { "epoch": 0.41204964629754876, "grad_norm": 6.650060216978216, "learning_rate": 6.636680129175472e-06, "loss": 17.3783, "step": 22542 }, { "epoch": 0.4120679254939953, "grad_norm": 5.154164921642721, "learning_rate": 6.636400422637439e-06, "loss": 16.9665, "step": 22543 }, { "epoch": 0.4120862046904418, "grad_norm": 7.4621241910548815, "learning_rate": 6.636120710363964e-06, "loss": 18.1499, "step": 22544 }, { "epoch": 0.4121044838868883, "grad_norm": 7.432794839233043, "learning_rate": 6.635840992356026e-06, "loss": 17.9092, "step": 22545 }, { "epoch": 0.41212276308333484, "grad_norm": 7.170814548989489, "learning_rate": 6.63556126861461e-06, "loss": 17.5366, "step": 22546 }, { "epoch": 0.4121410422797814, "grad_norm": 5.518909409711679, "learning_rate": 6.635281539140692e-06, "loss": 17.0978, "step": 22547 }, { "epoch": 0.4121593214762279, "grad_norm": 6.302062730524471, "learning_rate": 6.635001803935255e-06, "loss": 17.6605, "step": 22548 }, { "epoch": 0.41217760067267445, "grad_norm": 7.471207997707816, "learning_rate": 6.634722062999278e-06, "loss": 17.9399, "step": 22549 }, { "epoch": 0.41219587986912093, "grad_norm": 5.324470003932659, "learning_rate": 6.634442316333742e-06, "loss": 17.2086, "step": 22550 }, { "epoch": 0.41221415906556746, "grad_norm": 5.9771071974674905, "learning_rate": 6.634162563939628e-06, "loss": 17.3712, "step": 22551 }, { "epoch": 0.412232438262014, "grad_norm": 6.7774724499917225, "learning_rate": 6.633882805817917e-06, "loss": 17.7359, "step": 22552 }, { "epoch": 0.41225071745846054, "grad_norm": 7.551931887991059, "learning_rate": 6.6336030419695866e-06, "loss": 17.8439, "step": 22553 }, { "epoch": 0.41226899665490707, "grad_norm": 5.753257938800097, "learning_rate": 6.63332327239562e-06, "loss": 17.4086, "step": 22554 }, { "epoch": 0.41228727585135355, "grad_norm": 5.866038870768499, "learning_rate": 6.633043497096998e-06, "loss": 17.457, "step": 22555 }, { "epoch": 0.4123055550478001, "grad_norm": 6.0536502638895, "learning_rate": 6.632763716074699e-06, "loss": 17.304, "step": 22556 }, { "epoch": 0.4123238342442466, "grad_norm": 7.563518144366697, "learning_rate": 6.632483929329705e-06, "loss": 17.8347, "step": 22557 }, { "epoch": 0.41234211344069316, "grad_norm": 8.167575832029081, "learning_rate": 6.6322041368629965e-06, "loss": 18.2212, "step": 22558 }, { "epoch": 0.4123603926371397, "grad_norm": 6.886400060823418, "learning_rate": 6.631924338675555e-06, "loss": 18.0066, "step": 22559 }, { "epoch": 0.4123786718335862, "grad_norm": 5.423171002706399, "learning_rate": 6.63164453476836e-06, "loss": 17.2012, "step": 22560 }, { "epoch": 0.4123969510300327, "grad_norm": 6.1445295548696315, "learning_rate": 6.631364725142392e-06, "loss": 17.5894, "step": 22561 }, { "epoch": 0.41241523022647925, "grad_norm": 7.212889865114428, "learning_rate": 6.631084909798632e-06, "loss": 17.8984, "step": 22562 }, { "epoch": 0.4124335094229258, "grad_norm": 9.16653149833562, "learning_rate": 6.630805088738061e-06, "loss": 17.6354, "step": 22563 }, { "epoch": 0.4124517886193723, "grad_norm": 6.325290511361483, "learning_rate": 6.63052526196166e-06, "loss": 17.5876, "step": 22564 }, { "epoch": 0.4124700678158188, "grad_norm": 5.763074934803377, "learning_rate": 6.63024542947041e-06, "loss": 17.3363, "step": 22565 }, { "epoch": 0.41248834701226533, "grad_norm": 5.958746031648419, "learning_rate": 6.629965591265292e-06, "loss": 17.223, "step": 22566 }, { "epoch": 0.41250662620871187, "grad_norm": 6.515658349181106, "learning_rate": 6.629685747347283e-06, "loss": 17.6717, "step": 22567 }, { "epoch": 0.4125249054051584, "grad_norm": 4.946023024333121, "learning_rate": 6.629405897717368e-06, "loss": 16.9446, "step": 22568 }, { "epoch": 0.41254318460160494, "grad_norm": 5.283187874761963, "learning_rate": 6.629126042376528e-06, "loss": 17.1899, "step": 22569 }, { "epoch": 0.4125614637980514, "grad_norm": 7.248605819013886, "learning_rate": 6.628846181325742e-06, "loss": 17.7481, "step": 22570 }, { "epoch": 0.41257974299449796, "grad_norm": 7.405447399579446, "learning_rate": 6.628566314565992e-06, "loss": 17.8948, "step": 22571 }, { "epoch": 0.4125980221909445, "grad_norm": 6.6967074034598175, "learning_rate": 6.628286442098256e-06, "loss": 17.7341, "step": 22572 }, { "epoch": 0.412616301387391, "grad_norm": 5.734497313651601, "learning_rate": 6.62800656392352e-06, "loss": 17.2537, "step": 22573 }, { "epoch": 0.4126345805838375, "grad_norm": 7.758596269655583, "learning_rate": 6.627726680042762e-06, "loss": 17.9411, "step": 22574 }, { "epoch": 0.41265285978028404, "grad_norm": 5.201702593488365, "learning_rate": 6.6274467904569615e-06, "loss": 16.9402, "step": 22575 }, { "epoch": 0.4126711389767306, "grad_norm": 5.9675413553645065, "learning_rate": 6.627166895167103e-06, "loss": 17.3452, "step": 22576 }, { "epoch": 0.4126894181731771, "grad_norm": 7.291423129238422, "learning_rate": 6.626886994174165e-06, "loss": 17.779, "step": 22577 }, { "epoch": 0.41270769736962365, "grad_norm": 5.968892468616462, "learning_rate": 6.626607087479129e-06, "loss": 17.5132, "step": 22578 }, { "epoch": 0.41272597656607013, "grad_norm": 7.170439939026732, "learning_rate": 6.6263271750829775e-06, "loss": 17.7694, "step": 22579 }, { "epoch": 0.41274425576251667, "grad_norm": 7.318670095737872, "learning_rate": 6.626047256986688e-06, "loss": 17.9435, "step": 22580 }, { "epoch": 0.4127625349589632, "grad_norm": 6.036756291847952, "learning_rate": 6.625767333191247e-06, "loss": 17.398, "step": 22581 }, { "epoch": 0.41278081415540974, "grad_norm": 6.053125609980945, "learning_rate": 6.62548740369763e-06, "loss": 17.3572, "step": 22582 }, { "epoch": 0.41279909335185627, "grad_norm": 6.199717892438728, "learning_rate": 6.625207468506822e-06, "loss": 17.5492, "step": 22583 }, { "epoch": 0.41281737254830275, "grad_norm": 6.698844190195537, "learning_rate": 6.624927527619803e-06, "loss": 17.5677, "step": 22584 }, { "epoch": 0.4128356517447493, "grad_norm": 5.988876906425944, "learning_rate": 6.624647581037553e-06, "loss": 17.344, "step": 22585 }, { "epoch": 0.4128539309411958, "grad_norm": 6.045429597912229, "learning_rate": 6.624367628761056e-06, "loss": 17.4643, "step": 22586 }, { "epoch": 0.41287221013764236, "grad_norm": 6.42182730756578, "learning_rate": 6.624087670791291e-06, "loss": 17.6234, "step": 22587 }, { "epoch": 0.4128904893340889, "grad_norm": 6.986066200367678, "learning_rate": 6.623807707129237e-06, "loss": 17.4393, "step": 22588 }, { "epoch": 0.4129087685305354, "grad_norm": 5.657003922026771, "learning_rate": 6.623527737775881e-06, "loss": 17.2285, "step": 22589 }, { "epoch": 0.4129270477269819, "grad_norm": 6.25354650988634, "learning_rate": 6.623247762732199e-06, "loss": 17.6109, "step": 22590 }, { "epoch": 0.41294532692342845, "grad_norm": 6.1299618904758315, "learning_rate": 6.622967781999175e-06, "loss": 17.2798, "step": 22591 }, { "epoch": 0.412963606119875, "grad_norm": 6.8638793367227775, "learning_rate": 6.622687795577792e-06, "loss": 17.8593, "step": 22592 }, { "epoch": 0.4129818853163215, "grad_norm": 6.796283468696494, "learning_rate": 6.622407803469027e-06, "loss": 17.9025, "step": 22593 }, { "epoch": 0.413000164512768, "grad_norm": 7.761046709983375, "learning_rate": 6.622127805673863e-06, "loss": 17.8826, "step": 22594 }, { "epoch": 0.41301844370921453, "grad_norm": 5.004190494024626, "learning_rate": 6.621847802193282e-06, "loss": 17.0119, "step": 22595 }, { "epoch": 0.41303672290566107, "grad_norm": 7.200378132739967, "learning_rate": 6.621567793028265e-06, "loss": 17.9817, "step": 22596 }, { "epoch": 0.4130550021021076, "grad_norm": 5.330772260455991, "learning_rate": 6.621287778179795e-06, "loss": 17.1643, "step": 22597 }, { "epoch": 0.41307328129855414, "grad_norm": 6.535466779561626, "learning_rate": 6.621007757648852e-06, "loss": 17.729, "step": 22598 }, { "epoch": 0.4130915604950006, "grad_norm": 5.811170757742771, "learning_rate": 6.620727731436416e-06, "loss": 17.0422, "step": 22599 }, { "epoch": 0.41310983969144716, "grad_norm": 6.824860401716977, "learning_rate": 6.62044769954347e-06, "loss": 17.7706, "step": 22600 }, { "epoch": 0.4131281188878937, "grad_norm": 5.996010384567158, "learning_rate": 6.620167661970998e-06, "loss": 17.5312, "step": 22601 }, { "epoch": 0.4131463980843402, "grad_norm": 7.0419128847561, "learning_rate": 6.619887618719977e-06, "loss": 17.9146, "step": 22602 }, { "epoch": 0.41316467728078676, "grad_norm": 6.673918239797463, "learning_rate": 6.61960756979139e-06, "loss": 17.6691, "step": 22603 }, { "epoch": 0.41318295647723324, "grad_norm": 5.993621475464369, "learning_rate": 6.619327515186219e-06, "loss": 17.352, "step": 22604 }, { "epoch": 0.4132012356736798, "grad_norm": 6.092149993554768, "learning_rate": 6.619047454905446e-06, "loss": 17.5204, "step": 22605 }, { "epoch": 0.4132195148701263, "grad_norm": 6.303515551780367, "learning_rate": 6.618767388950052e-06, "loss": 17.4342, "step": 22606 }, { "epoch": 0.41323779406657285, "grad_norm": 7.357594247325871, "learning_rate": 6.6184873173210194e-06, "loss": 17.9901, "step": 22607 }, { "epoch": 0.41325607326301933, "grad_norm": 8.321893973483117, "learning_rate": 6.61820724001933e-06, "loss": 18.4359, "step": 22608 }, { "epoch": 0.41327435245946587, "grad_norm": 5.0807070169529585, "learning_rate": 6.6179271570459625e-06, "loss": 17.0118, "step": 22609 }, { "epoch": 0.4132926316559124, "grad_norm": 7.203675890097184, "learning_rate": 6.617647068401902e-06, "loss": 18.179, "step": 22610 }, { "epoch": 0.41331091085235894, "grad_norm": 5.735684038570406, "learning_rate": 6.61736697408813e-06, "loss": 17.46, "step": 22611 }, { "epoch": 0.4133291900488055, "grad_norm": 6.064406790187848, "learning_rate": 6.6170868741056235e-06, "loss": 17.3494, "step": 22612 }, { "epoch": 0.41334746924525195, "grad_norm": 6.664329183395076, "learning_rate": 6.616806768455371e-06, "loss": 17.6179, "step": 22613 }, { "epoch": 0.4133657484416985, "grad_norm": 6.3572287918374055, "learning_rate": 6.61652665713835e-06, "loss": 17.4921, "step": 22614 }, { "epoch": 0.413384027638145, "grad_norm": 6.907970743755561, "learning_rate": 6.616246540155544e-06, "loss": 17.6579, "step": 22615 }, { "epoch": 0.41340230683459156, "grad_norm": 6.736745296231206, "learning_rate": 6.615966417507933e-06, "loss": 17.8245, "step": 22616 }, { "epoch": 0.4134205860310381, "grad_norm": 6.102058842223797, "learning_rate": 6.615686289196501e-06, "loss": 17.5284, "step": 22617 }, { "epoch": 0.4134388652274846, "grad_norm": 6.820874290130761, "learning_rate": 6.615406155222228e-06, "loss": 17.5447, "step": 22618 }, { "epoch": 0.4134571444239311, "grad_norm": 6.451908386244689, "learning_rate": 6.615126015586097e-06, "loss": 17.7669, "step": 22619 }, { "epoch": 0.41347542362037765, "grad_norm": 8.368993765242212, "learning_rate": 6.614845870289089e-06, "loss": 18.1534, "step": 22620 }, { "epoch": 0.4134937028168242, "grad_norm": 6.6094965163326735, "learning_rate": 6.614565719332187e-06, "loss": 17.5565, "step": 22621 }, { "epoch": 0.4135119820132707, "grad_norm": 8.729927958239205, "learning_rate": 6.614285562716372e-06, "loss": 18.5272, "step": 22622 }, { "epoch": 0.4135302612097172, "grad_norm": 7.685832120660212, "learning_rate": 6.614005400442625e-06, "loss": 17.6718, "step": 22623 }, { "epoch": 0.41354854040616373, "grad_norm": 5.114941706725837, "learning_rate": 6.613725232511931e-06, "loss": 17.1031, "step": 22624 }, { "epoch": 0.41356681960261027, "grad_norm": 7.073382172132256, "learning_rate": 6.613445058925271e-06, "loss": 17.626, "step": 22625 }, { "epoch": 0.4135850987990568, "grad_norm": 6.521550957369006, "learning_rate": 6.613164879683622e-06, "loss": 17.5834, "step": 22626 }, { "epoch": 0.41360337799550334, "grad_norm": 5.709561474608981, "learning_rate": 6.612884694787973e-06, "loss": 17.4551, "step": 22627 }, { "epoch": 0.4136216571919498, "grad_norm": 6.014032259728497, "learning_rate": 6.612604504239304e-06, "loss": 17.3407, "step": 22628 }, { "epoch": 0.41363993638839636, "grad_norm": 6.536297970602517, "learning_rate": 6.612324308038595e-06, "loss": 17.4338, "step": 22629 }, { "epoch": 0.4136582155848429, "grad_norm": 6.439188938365965, "learning_rate": 6.612044106186829e-06, "loss": 17.4706, "step": 22630 }, { "epoch": 0.4136764947812894, "grad_norm": 6.860891132068323, "learning_rate": 6.611763898684989e-06, "loss": 17.84, "step": 22631 }, { "epoch": 0.41369477397773596, "grad_norm": 6.780814567751688, "learning_rate": 6.611483685534054e-06, "loss": 17.4697, "step": 22632 }, { "epoch": 0.41371305317418244, "grad_norm": 7.741936908099513, "learning_rate": 6.61120346673501e-06, "loss": 18.2421, "step": 22633 }, { "epoch": 0.413731332370629, "grad_norm": 8.460448600785817, "learning_rate": 6.610923242288838e-06, "loss": 18.2521, "step": 22634 }, { "epoch": 0.4137496115670755, "grad_norm": 9.804278378568787, "learning_rate": 6.6106430121965206e-06, "loss": 18.8881, "step": 22635 }, { "epoch": 0.41376789076352205, "grad_norm": 6.469584170545846, "learning_rate": 6.610362776459038e-06, "loss": 17.5042, "step": 22636 }, { "epoch": 0.4137861699599686, "grad_norm": 6.491743775339879, "learning_rate": 6.610082535077373e-06, "loss": 17.6091, "step": 22637 }, { "epoch": 0.41380444915641507, "grad_norm": 6.723904337174401, "learning_rate": 6.6098022880525114e-06, "loss": 17.8345, "step": 22638 }, { "epoch": 0.4138227283528616, "grad_norm": 6.348440816127864, "learning_rate": 6.609522035385429e-06, "loss": 17.4658, "step": 22639 }, { "epoch": 0.41384100754930814, "grad_norm": 6.6049482488077915, "learning_rate": 6.6092417770771135e-06, "loss": 17.7139, "step": 22640 }, { "epoch": 0.4138592867457547, "grad_norm": 7.664834529279627, "learning_rate": 6.608961513128544e-06, "loss": 18.1336, "step": 22641 }, { "epoch": 0.41387756594220115, "grad_norm": 8.67237503839013, "learning_rate": 6.608681243540706e-06, "loss": 18.5005, "step": 22642 }, { "epoch": 0.4138958451386477, "grad_norm": 6.90125030437421, "learning_rate": 6.608400968314578e-06, "loss": 17.5234, "step": 22643 }, { "epoch": 0.4139141243350942, "grad_norm": 6.710162254357177, "learning_rate": 6.608120687451144e-06, "loss": 17.7885, "step": 22644 }, { "epoch": 0.41393240353154076, "grad_norm": 7.94811464687793, "learning_rate": 6.607840400951387e-06, "loss": 18.3258, "step": 22645 }, { "epoch": 0.4139506827279873, "grad_norm": 6.864843794923079, "learning_rate": 6.60756010881629e-06, "loss": 17.9661, "step": 22646 }, { "epoch": 0.4139689619244338, "grad_norm": 5.348178509871798, "learning_rate": 6.607279811046834e-06, "loss": 17.0711, "step": 22647 }, { "epoch": 0.4139872411208803, "grad_norm": 5.843528393840664, "learning_rate": 6.6069995076440004e-06, "loss": 17.3442, "step": 22648 }, { "epoch": 0.41400552031732685, "grad_norm": 7.0493975084425164, "learning_rate": 6.606719198608775e-06, "loss": 17.751, "step": 22649 }, { "epoch": 0.4140237995137734, "grad_norm": 6.850352787803978, "learning_rate": 6.606438883942136e-06, "loss": 17.8465, "step": 22650 }, { "epoch": 0.4140420787102199, "grad_norm": 6.794192267071566, "learning_rate": 6.606158563645069e-06, "loss": 18.0543, "step": 22651 }, { "epoch": 0.4140603579066664, "grad_norm": 5.6260671342242246, "learning_rate": 6.605878237718557e-06, "loss": 17.2493, "step": 22652 }, { "epoch": 0.41407863710311293, "grad_norm": 5.133847299703579, "learning_rate": 6.605597906163579e-06, "loss": 17.0291, "step": 22653 }, { "epoch": 0.41409691629955947, "grad_norm": 6.321677314254794, "learning_rate": 6.60531756898112e-06, "loss": 17.601, "step": 22654 }, { "epoch": 0.414115195496006, "grad_norm": 7.713653231749315, "learning_rate": 6.605037226172164e-06, "loss": 18.0653, "step": 22655 }, { "epoch": 0.41413347469245254, "grad_norm": 7.827450559057984, "learning_rate": 6.60475687773769e-06, "loss": 18.1411, "step": 22656 }, { "epoch": 0.414151753888899, "grad_norm": 7.096833323457897, "learning_rate": 6.604476523678682e-06, "loss": 17.9774, "step": 22657 }, { "epoch": 0.41417003308534556, "grad_norm": 8.250085366544415, "learning_rate": 6.604196163996124e-06, "loss": 18.4161, "step": 22658 }, { "epoch": 0.4141883122817921, "grad_norm": 6.781771733149326, "learning_rate": 6.603915798690999e-06, "loss": 17.7022, "step": 22659 }, { "epoch": 0.41420659147823863, "grad_norm": 6.757840342237743, "learning_rate": 6.603635427764286e-06, "loss": 17.8375, "step": 22660 }, { "epoch": 0.41422487067468516, "grad_norm": 6.5419938792567, "learning_rate": 6.603355051216971e-06, "loss": 17.7905, "step": 22661 }, { "epoch": 0.41424314987113164, "grad_norm": 5.537596873236283, "learning_rate": 6.603074669050036e-06, "loss": 17.2593, "step": 22662 }, { "epoch": 0.4142614290675782, "grad_norm": 7.3491561295609324, "learning_rate": 6.602794281264462e-06, "loss": 17.7703, "step": 22663 }, { "epoch": 0.4142797082640247, "grad_norm": 6.3830916393239745, "learning_rate": 6.602513887861235e-06, "loss": 17.7261, "step": 22664 }, { "epoch": 0.41429798746047125, "grad_norm": 4.750540918325534, "learning_rate": 6.6022334888413345e-06, "loss": 16.8368, "step": 22665 }, { "epoch": 0.4143162666569178, "grad_norm": 6.487918736243572, "learning_rate": 6.601953084205745e-06, "loss": 17.3947, "step": 22666 }, { "epoch": 0.41433454585336427, "grad_norm": 8.488969122297817, "learning_rate": 6.601672673955449e-06, "loss": 17.4989, "step": 22667 }, { "epoch": 0.4143528250498108, "grad_norm": 5.959678194243318, "learning_rate": 6.601392258091429e-06, "loss": 17.2292, "step": 22668 }, { "epoch": 0.41437110424625734, "grad_norm": 6.752733173416814, "learning_rate": 6.60111183661467e-06, "loss": 17.5716, "step": 22669 }, { "epoch": 0.4143893834427039, "grad_norm": 7.461818859216035, "learning_rate": 6.600831409526152e-06, "loss": 18.3179, "step": 22670 }, { "epoch": 0.4144076626391504, "grad_norm": 6.52799862030602, "learning_rate": 6.6005509768268575e-06, "loss": 17.829, "step": 22671 }, { "epoch": 0.4144259418355969, "grad_norm": 8.700369849679246, "learning_rate": 6.60027053851777e-06, "loss": 18.6796, "step": 22672 }, { "epoch": 0.4144442210320434, "grad_norm": 6.722078435566233, "learning_rate": 6.599990094599875e-06, "loss": 17.6187, "step": 22673 }, { "epoch": 0.41446250022848996, "grad_norm": 8.761588404165707, "learning_rate": 6.599709645074154e-06, "loss": 18.3351, "step": 22674 }, { "epoch": 0.4144807794249365, "grad_norm": 5.366673880496607, "learning_rate": 6.599429189941589e-06, "loss": 16.8164, "step": 22675 }, { "epoch": 0.414499058621383, "grad_norm": 5.554265745940292, "learning_rate": 6.599148729203162e-06, "loss": 17.0912, "step": 22676 }, { "epoch": 0.4145173378178295, "grad_norm": 8.969641571116686, "learning_rate": 6.598868262859859e-06, "loss": 18.5551, "step": 22677 }, { "epoch": 0.41453561701427605, "grad_norm": 5.718663842826723, "learning_rate": 6.598587790912661e-06, "loss": 17.3056, "step": 22678 }, { "epoch": 0.4145538962107226, "grad_norm": 5.6854851969374405, "learning_rate": 6.598307313362552e-06, "loss": 17.4263, "step": 22679 }, { "epoch": 0.4145721754071691, "grad_norm": 7.126189282650898, "learning_rate": 6.598026830210513e-06, "loss": 17.6173, "step": 22680 }, { "epoch": 0.4145904546036156, "grad_norm": 6.867647480876624, "learning_rate": 6.597746341457531e-06, "loss": 17.8448, "step": 22681 }, { "epoch": 0.41460873380006213, "grad_norm": 7.245180556532616, "learning_rate": 6.597465847104585e-06, "loss": 17.9264, "step": 22682 }, { "epoch": 0.41462701299650867, "grad_norm": 5.698173578017259, "learning_rate": 6.597185347152661e-06, "loss": 17.2355, "step": 22683 }, { "epoch": 0.4146452921929552, "grad_norm": 7.678229885503245, "learning_rate": 6.596904841602741e-06, "loss": 18.1914, "step": 22684 }, { "epoch": 0.41466357138940174, "grad_norm": 6.946669557539084, "learning_rate": 6.596624330455805e-06, "loss": 17.7522, "step": 22685 }, { "epoch": 0.4146818505858482, "grad_norm": 8.112722763877004, "learning_rate": 6.596343813712843e-06, "loss": 18.1864, "step": 22686 }, { "epoch": 0.41470012978229476, "grad_norm": 7.077645673920443, "learning_rate": 6.5960632913748334e-06, "loss": 17.761, "step": 22687 }, { "epoch": 0.4147184089787413, "grad_norm": 5.948136834264524, "learning_rate": 6.595782763442759e-06, "loss": 17.4198, "step": 22688 }, { "epoch": 0.41473668817518783, "grad_norm": 5.479383944276968, "learning_rate": 6.595502229917608e-06, "loss": 17.3015, "step": 22689 }, { "epoch": 0.41475496737163436, "grad_norm": 5.630273377241858, "learning_rate": 6.595221690800356e-06, "loss": 17.2719, "step": 22690 }, { "epoch": 0.41477324656808084, "grad_norm": 7.345218275703665, "learning_rate": 6.594941146091993e-06, "loss": 18.4604, "step": 22691 }, { "epoch": 0.4147915257645274, "grad_norm": 5.511659026344551, "learning_rate": 6.594660595793498e-06, "loss": 17.2525, "step": 22692 }, { "epoch": 0.4148098049609739, "grad_norm": 5.954239280317081, "learning_rate": 6.5943800399058586e-06, "loss": 17.4721, "step": 22693 }, { "epoch": 0.41482808415742045, "grad_norm": 8.52838192229829, "learning_rate": 6.594099478430052e-06, "loss": 18.3876, "step": 22694 }, { "epoch": 0.414846363353867, "grad_norm": 5.610287645105502, "learning_rate": 6.593818911367067e-06, "loss": 17.1563, "step": 22695 }, { "epoch": 0.41486464255031347, "grad_norm": 5.801369980537789, "learning_rate": 6.593538338717885e-06, "loss": 17.0791, "step": 22696 }, { "epoch": 0.41488292174676, "grad_norm": 5.399986477203266, "learning_rate": 6.59325776048349e-06, "loss": 17.1426, "step": 22697 }, { "epoch": 0.41490120094320654, "grad_norm": 5.841316509387635, "learning_rate": 6.5929771766648646e-06, "loss": 17.4869, "step": 22698 }, { "epoch": 0.4149194801396531, "grad_norm": 7.118676761819252, "learning_rate": 6.59269658726299e-06, "loss": 18.0046, "step": 22699 }, { "epoch": 0.4149377593360996, "grad_norm": 6.532402113897623, "learning_rate": 6.592415992278855e-06, "loss": 17.5097, "step": 22700 }, { "epoch": 0.4149560385325461, "grad_norm": 5.9490599045365, "learning_rate": 6.59213539171344e-06, "loss": 17.435, "step": 22701 }, { "epoch": 0.4149743177289926, "grad_norm": 6.041935462363598, "learning_rate": 6.591854785567727e-06, "loss": 17.2972, "step": 22702 }, { "epoch": 0.41499259692543916, "grad_norm": 7.19247230647167, "learning_rate": 6.591574173842702e-06, "loss": 17.942, "step": 22703 }, { "epoch": 0.4150108761218857, "grad_norm": 6.734515529317652, "learning_rate": 6.591293556539348e-06, "loss": 17.7786, "step": 22704 }, { "epoch": 0.41502915531833223, "grad_norm": 6.395805860776399, "learning_rate": 6.591012933658647e-06, "loss": 17.2242, "step": 22705 }, { "epoch": 0.4150474345147787, "grad_norm": 7.439591270775899, "learning_rate": 6.5907323052015846e-06, "loss": 18.0942, "step": 22706 }, { "epoch": 0.41506571371122525, "grad_norm": 7.104558951426564, "learning_rate": 6.590451671169143e-06, "loss": 18.0514, "step": 22707 }, { "epoch": 0.4150839929076718, "grad_norm": 6.619138140788436, "learning_rate": 6.590171031562307e-06, "loss": 17.6459, "step": 22708 }, { "epoch": 0.4151022721041183, "grad_norm": 7.68805210518015, "learning_rate": 6.589890386382058e-06, "loss": 18.1722, "step": 22709 }, { "epoch": 0.4151205513005648, "grad_norm": 6.3781541007056, "learning_rate": 6.589609735629383e-06, "loss": 17.4187, "step": 22710 }, { "epoch": 0.41513883049701134, "grad_norm": 6.485791575507058, "learning_rate": 6.589329079305265e-06, "loss": 17.3851, "step": 22711 }, { "epoch": 0.41515710969345787, "grad_norm": 7.265329608831831, "learning_rate": 6.589048417410683e-06, "loss": 18.2836, "step": 22712 }, { "epoch": 0.4151753888899044, "grad_norm": 6.106993705205944, "learning_rate": 6.5887677499466255e-06, "loss": 17.276, "step": 22713 }, { "epoch": 0.41519366808635094, "grad_norm": 8.007759577422881, "learning_rate": 6.588487076914076e-06, "loss": 17.994, "step": 22714 }, { "epoch": 0.4152119472827974, "grad_norm": 6.874602017566855, "learning_rate": 6.588206398314017e-06, "loss": 17.7814, "step": 22715 }, { "epoch": 0.41523022647924396, "grad_norm": 7.192935282340736, "learning_rate": 6.58792571414743e-06, "loss": 17.6344, "step": 22716 }, { "epoch": 0.4152485056756905, "grad_norm": 7.48865060419624, "learning_rate": 6.587645024415304e-06, "loss": 17.8313, "step": 22717 }, { "epoch": 0.41526678487213703, "grad_norm": 6.263113228871017, "learning_rate": 6.587364329118619e-06, "loss": 17.3444, "step": 22718 }, { "epoch": 0.41528506406858356, "grad_norm": 7.208517383989723, "learning_rate": 6.587083628258358e-06, "loss": 17.7908, "step": 22719 }, { "epoch": 0.41530334326503004, "grad_norm": 8.204107080778781, "learning_rate": 6.586802921835509e-06, "loss": 18.3743, "step": 22720 }, { "epoch": 0.4153216224614766, "grad_norm": 6.759058520351196, "learning_rate": 6.586522209851053e-06, "loss": 17.6838, "step": 22721 }, { "epoch": 0.4153399016579231, "grad_norm": 5.367233973973414, "learning_rate": 6.586241492305974e-06, "loss": 17.1537, "step": 22722 }, { "epoch": 0.41535818085436965, "grad_norm": 5.641765597733447, "learning_rate": 6.585960769201256e-06, "loss": 17.2739, "step": 22723 }, { "epoch": 0.4153764600508162, "grad_norm": 5.603372240086101, "learning_rate": 6.585680040537884e-06, "loss": 17.1906, "step": 22724 }, { "epoch": 0.41539473924726267, "grad_norm": 6.509496739115115, "learning_rate": 6.58539930631684e-06, "loss": 17.7058, "step": 22725 }, { "epoch": 0.4154130184437092, "grad_norm": 7.650496191049718, "learning_rate": 6.585118566539108e-06, "loss": 17.7613, "step": 22726 }, { "epoch": 0.41543129764015574, "grad_norm": 6.774446900572114, "learning_rate": 6.584837821205675e-06, "loss": 18.0853, "step": 22727 }, { "epoch": 0.4154495768366023, "grad_norm": 8.229439411922518, "learning_rate": 6.584557070317523e-06, "loss": 18.2243, "step": 22728 }, { "epoch": 0.4154678560330488, "grad_norm": 5.692487977891022, "learning_rate": 6.584276313875635e-06, "loss": 17.2809, "step": 22729 }, { "epoch": 0.4154861352294953, "grad_norm": 7.541651090569733, "learning_rate": 6.583995551880996e-06, "loss": 17.8468, "step": 22730 }, { "epoch": 0.4155044144259418, "grad_norm": 7.593822362366229, "learning_rate": 6.58371478433459e-06, "loss": 17.8409, "step": 22731 }, { "epoch": 0.41552269362238836, "grad_norm": 6.341984989300643, "learning_rate": 6.5834340112374015e-06, "loss": 17.7052, "step": 22732 }, { "epoch": 0.4155409728188349, "grad_norm": 6.75870241976922, "learning_rate": 6.583153232590415e-06, "loss": 17.8381, "step": 22733 }, { "epoch": 0.41555925201528143, "grad_norm": 6.017906222199273, "learning_rate": 6.5828724483946124e-06, "loss": 17.5057, "step": 22734 }, { "epoch": 0.4155775312117279, "grad_norm": 6.575012576757038, "learning_rate": 6.58259165865098e-06, "loss": 17.4874, "step": 22735 }, { "epoch": 0.41559581040817445, "grad_norm": 6.8646238780668005, "learning_rate": 6.582310863360501e-06, "loss": 17.6847, "step": 22736 }, { "epoch": 0.415614089604621, "grad_norm": 6.004205502592466, "learning_rate": 6.58203006252416e-06, "loss": 17.261, "step": 22737 }, { "epoch": 0.4156323688010675, "grad_norm": 6.6996546292894354, "learning_rate": 6.581749256142941e-06, "loss": 17.5514, "step": 22738 }, { "epoch": 0.41565064799751406, "grad_norm": 5.002377555622047, "learning_rate": 6.581468444217827e-06, "loss": 16.9799, "step": 22739 }, { "epoch": 0.41566892719396054, "grad_norm": 5.442555686252805, "learning_rate": 6.581187626749803e-06, "loss": 17.1699, "step": 22740 }, { "epoch": 0.41568720639040707, "grad_norm": 5.511346881169742, "learning_rate": 6.580906803739855e-06, "loss": 17.1863, "step": 22741 }, { "epoch": 0.4157054855868536, "grad_norm": 5.8617931785889565, "learning_rate": 6.580625975188966e-06, "loss": 17.1286, "step": 22742 }, { "epoch": 0.41572376478330014, "grad_norm": 6.571820235000502, "learning_rate": 6.58034514109812e-06, "loss": 17.4238, "step": 22743 }, { "epoch": 0.4157420439797466, "grad_norm": 7.297712380885935, "learning_rate": 6.5800643014683e-06, "loss": 17.8347, "step": 22744 }, { "epoch": 0.41576032317619316, "grad_norm": 7.361179552002683, "learning_rate": 6.579783456300494e-06, "loss": 17.7443, "step": 22745 }, { "epoch": 0.4157786023726397, "grad_norm": 5.652602800604153, "learning_rate": 6.579502605595682e-06, "loss": 17.3332, "step": 22746 }, { "epoch": 0.41579688156908623, "grad_norm": 6.973782360010165, "learning_rate": 6.579221749354851e-06, "loss": 17.2906, "step": 22747 }, { "epoch": 0.41581516076553277, "grad_norm": 5.04874986374777, "learning_rate": 6.578940887578985e-06, "loss": 16.7602, "step": 22748 }, { "epoch": 0.41583343996197925, "grad_norm": 6.617958056138184, "learning_rate": 6.578660020269069e-06, "loss": 17.5798, "step": 22749 }, { "epoch": 0.4158517191584258, "grad_norm": 5.884115906716348, "learning_rate": 6.578379147426085e-06, "loss": 17.076, "step": 22750 }, { "epoch": 0.4158699983548723, "grad_norm": 5.746617761475577, "learning_rate": 6.5780982690510195e-06, "loss": 17.3955, "step": 22751 }, { "epoch": 0.41588827755131885, "grad_norm": 5.915119519887098, "learning_rate": 6.577817385144858e-06, "loss": 17.3731, "step": 22752 }, { "epoch": 0.4159065567477654, "grad_norm": 5.217123786820856, "learning_rate": 6.577536495708582e-06, "loss": 16.9362, "step": 22753 }, { "epoch": 0.41592483594421187, "grad_norm": 7.274446557520727, "learning_rate": 6.577255600743178e-06, "loss": 17.6814, "step": 22754 }, { "epoch": 0.4159431151406584, "grad_norm": 7.1672588174974035, "learning_rate": 6.57697470024963e-06, "loss": 17.6601, "step": 22755 }, { "epoch": 0.41596139433710494, "grad_norm": 5.915564077256306, "learning_rate": 6.5766937942289236e-06, "loss": 17.3083, "step": 22756 }, { "epoch": 0.4159796735335515, "grad_norm": 6.479994222430836, "learning_rate": 6.5764128826820404e-06, "loss": 17.5307, "step": 22757 }, { "epoch": 0.415997952729998, "grad_norm": 7.006907757023098, "learning_rate": 6.5761319656099665e-06, "loss": 17.5083, "step": 22758 }, { "epoch": 0.4160162319264445, "grad_norm": 6.909314029790856, "learning_rate": 6.575851043013688e-06, "loss": 17.5981, "step": 22759 }, { "epoch": 0.416034511122891, "grad_norm": 8.133590289608975, "learning_rate": 6.575570114894189e-06, "loss": 18.3469, "step": 22760 }, { "epoch": 0.41605279031933756, "grad_norm": 7.532132414079868, "learning_rate": 6.575289181252452e-06, "loss": 17.8717, "step": 22761 }, { "epoch": 0.4160710695157841, "grad_norm": 8.058429272216127, "learning_rate": 6.575008242089463e-06, "loss": 17.4815, "step": 22762 }, { "epoch": 0.41608934871223063, "grad_norm": 6.538101077384175, "learning_rate": 6.574727297406208e-06, "loss": 17.3078, "step": 22763 }, { "epoch": 0.4161076279086771, "grad_norm": 7.684220028388074, "learning_rate": 6.5744463472036705e-06, "loss": 17.8021, "step": 22764 }, { "epoch": 0.41612590710512365, "grad_norm": 6.244223985643519, "learning_rate": 6.574165391482834e-06, "loss": 17.3872, "step": 22765 }, { "epoch": 0.4161441863015702, "grad_norm": 6.987439625975591, "learning_rate": 6.573884430244686e-06, "loss": 17.4608, "step": 22766 }, { "epoch": 0.4161624654980167, "grad_norm": 6.891764479283446, "learning_rate": 6.573603463490208e-06, "loss": 17.8429, "step": 22767 }, { "epoch": 0.41618074469446326, "grad_norm": 7.666691658856379, "learning_rate": 6.573322491220387e-06, "loss": 17.587, "step": 22768 }, { "epoch": 0.41619902389090974, "grad_norm": 5.806290877069214, "learning_rate": 6.573041513436208e-06, "loss": 17.4055, "step": 22769 }, { "epoch": 0.41621730308735627, "grad_norm": 5.16293829467992, "learning_rate": 6.572760530138654e-06, "loss": 17.1096, "step": 22770 }, { "epoch": 0.4162355822838028, "grad_norm": 7.348799653268203, "learning_rate": 6.572479541328711e-06, "loss": 17.7816, "step": 22771 }, { "epoch": 0.41625386148024934, "grad_norm": 6.755542268569418, "learning_rate": 6.5721985470073635e-06, "loss": 17.3061, "step": 22772 }, { "epoch": 0.4162721406766959, "grad_norm": 7.217781189562275, "learning_rate": 6.571917547175598e-06, "loss": 18.0527, "step": 22773 }, { "epoch": 0.41629041987314236, "grad_norm": 6.833506095546124, "learning_rate": 6.571636541834396e-06, "loss": 17.7029, "step": 22774 }, { "epoch": 0.4163086990695889, "grad_norm": 5.849803002947453, "learning_rate": 6.571355530984746e-06, "loss": 17.3694, "step": 22775 }, { "epoch": 0.41632697826603543, "grad_norm": 5.765323378988717, "learning_rate": 6.571074514627629e-06, "loss": 17.3174, "step": 22776 }, { "epoch": 0.41634525746248197, "grad_norm": 6.690528133298416, "learning_rate": 6.570793492764033e-06, "loss": 17.779, "step": 22777 }, { "epoch": 0.41636353665892845, "grad_norm": 5.990251398528859, "learning_rate": 6.570512465394943e-06, "loss": 17.3365, "step": 22778 }, { "epoch": 0.416381815855375, "grad_norm": 7.25207053808171, "learning_rate": 6.570231432521344e-06, "loss": 17.7794, "step": 22779 }, { "epoch": 0.4164000950518215, "grad_norm": 5.545128358764208, "learning_rate": 6.5699503941442176e-06, "loss": 17.218, "step": 22780 }, { "epoch": 0.41641837424826805, "grad_norm": 7.852032475184119, "learning_rate": 6.569669350264553e-06, "loss": 17.7779, "step": 22781 }, { "epoch": 0.4164366534447146, "grad_norm": 7.142927567560632, "learning_rate": 6.569388300883332e-06, "loss": 17.912, "step": 22782 }, { "epoch": 0.41645493264116107, "grad_norm": 8.292464851180304, "learning_rate": 6.569107246001542e-06, "loss": 18.0269, "step": 22783 }, { "epoch": 0.4164732118376076, "grad_norm": 7.474122331540772, "learning_rate": 6.568826185620169e-06, "loss": 17.2395, "step": 22784 }, { "epoch": 0.41649149103405414, "grad_norm": 7.301032832584391, "learning_rate": 6.568545119740193e-06, "loss": 17.9761, "step": 22785 }, { "epoch": 0.4165097702305007, "grad_norm": 6.912074852409992, "learning_rate": 6.568264048362605e-06, "loss": 17.6828, "step": 22786 }, { "epoch": 0.4165280494269472, "grad_norm": 5.650365942879607, "learning_rate": 6.567982971488387e-06, "loss": 17.1882, "step": 22787 }, { "epoch": 0.4165463286233937, "grad_norm": 6.1543212301739105, "learning_rate": 6.5677018891185255e-06, "loss": 17.501, "step": 22788 }, { "epoch": 0.4165646078198402, "grad_norm": 5.830334058663455, "learning_rate": 6.567420801254003e-06, "loss": 17.3876, "step": 22789 }, { "epoch": 0.41658288701628676, "grad_norm": 6.832920466203908, "learning_rate": 6.567139707895808e-06, "loss": 17.5439, "step": 22790 }, { "epoch": 0.4166011662127333, "grad_norm": 7.110551234129028, "learning_rate": 6.566858609044924e-06, "loss": 17.7816, "step": 22791 }, { "epoch": 0.41661944540917983, "grad_norm": 7.744163628442694, "learning_rate": 6.5665775047023365e-06, "loss": 18.228, "step": 22792 }, { "epoch": 0.4166377246056263, "grad_norm": 8.667665539047851, "learning_rate": 6.566296394869032e-06, "loss": 18.431, "step": 22793 }, { "epoch": 0.41665600380207285, "grad_norm": 6.7923573073427415, "learning_rate": 6.566015279545991e-06, "loss": 17.6889, "step": 22794 }, { "epoch": 0.4166742829985194, "grad_norm": 6.236571850196767, "learning_rate": 6.565734158734205e-06, "loss": 17.5213, "step": 22795 }, { "epoch": 0.4166925621949659, "grad_norm": 6.12475517112729, "learning_rate": 6.565453032434657e-06, "loss": 17.6715, "step": 22796 }, { "epoch": 0.41671084139141246, "grad_norm": 5.582975984442605, "learning_rate": 6.5651719006483304e-06, "loss": 17.1853, "step": 22797 }, { "epoch": 0.41672912058785894, "grad_norm": 7.714019679154534, "learning_rate": 6.564890763376212e-06, "loss": 18.1422, "step": 22798 }, { "epoch": 0.4167473997843055, "grad_norm": 6.2442493922056475, "learning_rate": 6.564609620619289e-06, "loss": 17.3394, "step": 22799 }, { "epoch": 0.416765678980752, "grad_norm": 6.482635908781921, "learning_rate": 6.564328472378545e-06, "loss": 17.7275, "step": 22800 }, { "epoch": 0.41678395817719854, "grad_norm": 5.819707570598144, "learning_rate": 6.564047318654965e-06, "loss": 17.1874, "step": 22801 }, { "epoch": 0.4168022373736451, "grad_norm": 7.5473504574752734, "learning_rate": 6.563766159449534e-06, "loss": 18.2899, "step": 22802 }, { "epoch": 0.41682051657009156, "grad_norm": 9.299562735722224, "learning_rate": 6.563484994763238e-06, "loss": 18.1879, "step": 22803 }, { "epoch": 0.4168387957665381, "grad_norm": 6.709706486727593, "learning_rate": 6.563203824597064e-06, "loss": 17.5875, "step": 22804 }, { "epoch": 0.41685707496298463, "grad_norm": 6.495214829325099, "learning_rate": 6.562922648951997e-06, "loss": 17.5701, "step": 22805 }, { "epoch": 0.41687535415943117, "grad_norm": 6.4480004868956415, "learning_rate": 6.562641467829021e-06, "loss": 17.1504, "step": 22806 }, { "epoch": 0.4168936333558777, "grad_norm": 6.590807356068329, "learning_rate": 6.562360281229121e-06, "loss": 17.6114, "step": 22807 }, { "epoch": 0.4169119125523242, "grad_norm": 5.205104114154562, "learning_rate": 6.562079089153285e-06, "loss": 16.862, "step": 22808 }, { "epoch": 0.4169301917487707, "grad_norm": 7.197480642816911, "learning_rate": 6.561797891602496e-06, "loss": 18.1306, "step": 22809 }, { "epoch": 0.41694847094521725, "grad_norm": 8.322559946414186, "learning_rate": 6.561516688577743e-06, "loss": 18.0837, "step": 22810 }, { "epoch": 0.4169667501416638, "grad_norm": 6.383365600979941, "learning_rate": 6.561235480080008e-06, "loss": 17.2967, "step": 22811 }, { "epoch": 0.41698502933811027, "grad_norm": 5.639384376893032, "learning_rate": 6.560954266110278e-06, "loss": 17.1241, "step": 22812 }, { "epoch": 0.4170033085345568, "grad_norm": 7.4142106095428435, "learning_rate": 6.560673046669539e-06, "loss": 17.7709, "step": 22813 }, { "epoch": 0.41702158773100334, "grad_norm": 5.281954945023397, "learning_rate": 6.560391821758778e-06, "loss": 16.9594, "step": 22814 }, { "epoch": 0.4170398669274499, "grad_norm": 8.812066905483043, "learning_rate": 6.560110591378978e-06, "loss": 18.5953, "step": 22815 }, { "epoch": 0.4170581461238964, "grad_norm": 6.876947558740086, "learning_rate": 6.559829355531125e-06, "loss": 18.0454, "step": 22816 }, { "epoch": 0.4170764253203429, "grad_norm": 7.917584726064076, "learning_rate": 6.5595481142162055e-06, "loss": 17.8833, "step": 22817 }, { "epoch": 0.4170947045167894, "grad_norm": 5.738162967416068, "learning_rate": 6.559266867435207e-06, "loss": 17.3356, "step": 22818 }, { "epoch": 0.41711298371323596, "grad_norm": 6.922606770731424, "learning_rate": 6.558985615189112e-06, "loss": 17.8734, "step": 22819 }, { "epoch": 0.4171312629096825, "grad_norm": 8.63541636951145, "learning_rate": 6.5587043574789065e-06, "loss": 18.5934, "step": 22820 }, { "epoch": 0.41714954210612903, "grad_norm": 7.578627984517951, "learning_rate": 6.55842309430558e-06, "loss": 17.95, "step": 22821 }, { "epoch": 0.4171678213025755, "grad_norm": 7.858886689256134, "learning_rate": 6.558141825670114e-06, "loss": 18.0093, "step": 22822 }, { "epoch": 0.41718610049902205, "grad_norm": 7.001485042822444, "learning_rate": 6.5578605515734964e-06, "loss": 17.706, "step": 22823 }, { "epoch": 0.4172043796954686, "grad_norm": 6.834582829360226, "learning_rate": 6.557579272016714e-06, "loss": 17.5111, "step": 22824 }, { "epoch": 0.4172226588919151, "grad_norm": 8.267023838670658, "learning_rate": 6.55729798700075e-06, "loss": 18.0063, "step": 22825 }, { "epoch": 0.41724093808836166, "grad_norm": 6.703518452516018, "learning_rate": 6.557016696526592e-06, "loss": 18.0998, "step": 22826 }, { "epoch": 0.41725921728480814, "grad_norm": 8.303254247812571, "learning_rate": 6.556735400595225e-06, "loss": 18.0793, "step": 22827 }, { "epoch": 0.4172774964812547, "grad_norm": 6.049871087523901, "learning_rate": 6.556454099207638e-06, "loss": 17.2825, "step": 22828 }, { "epoch": 0.4172957756777012, "grad_norm": 7.0593863672495205, "learning_rate": 6.5561727923648124e-06, "loss": 17.8252, "step": 22829 }, { "epoch": 0.41731405487414774, "grad_norm": 7.749978588804397, "learning_rate": 6.555891480067736e-06, "loss": 18.0291, "step": 22830 }, { "epoch": 0.4173323340705943, "grad_norm": 6.377548209145335, "learning_rate": 6.5556101623173966e-06, "loss": 17.8959, "step": 22831 }, { "epoch": 0.41735061326704076, "grad_norm": 7.416872017395103, "learning_rate": 6.555328839114776e-06, "loss": 18.2488, "step": 22832 }, { "epoch": 0.4173688924634873, "grad_norm": 5.627472921197626, "learning_rate": 6.555047510460866e-06, "loss": 17.2994, "step": 22833 }, { "epoch": 0.41738717165993383, "grad_norm": 7.142209636209356, "learning_rate": 6.554766176356646e-06, "loss": 17.8598, "step": 22834 }, { "epoch": 0.41740545085638037, "grad_norm": 6.156923175015818, "learning_rate": 6.554484836803108e-06, "loss": 17.411, "step": 22835 }, { "epoch": 0.4174237300528269, "grad_norm": 7.262796757388549, "learning_rate": 6.554203491801235e-06, "loss": 17.9443, "step": 22836 }, { "epoch": 0.4174420092492734, "grad_norm": 6.31230909615951, "learning_rate": 6.553922141352012e-06, "loss": 17.5397, "step": 22837 }, { "epoch": 0.4174602884457199, "grad_norm": 5.799852278081229, "learning_rate": 6.55364078545643e-06, "loss": 17.0805, "step": 22838 }, { "epoch": 0.41747856764216645, "grad_norm": 6.628159449081681, "learning_rate": 6.553359424115468e-06, "loss": 17.6692, "step": 22839 }, { "epoch": 0.417496846838613, "grad_norm": 6.820717034896563, "learning_rate": 6.553078057330118e-06, "loss": 17.6101, "step": 22840 }, { "epoch": 0.4175151260350595, "grad_norm": 6.089442215659412, "learning_rate": 6.552796685101364e-06, "loss": 17.4498, "step": 22841 }, { "epoch": 0.417533405231506, "grad_norm": 6.499586675908634, "learning_rate": 6.552515307430194e-06, "loss": 17.801, "step": 22842 }, { "epoch": 0.41755168442795254, "grad_norm": 5.901833702082617, "learning_rate": 6.55223392431759e-06, "loss": 17.4328, "step": 22843 }, { "epoch": 0.4175699636243991, "grad_norm": 8.705715413606889, "learning_rate": 6.551952535764541e-06, "loss": 18.5282, "step": 22844 }, { "epoch": 0.4175882428208456, "grad_norm": 6.320404062671469, "learning_rate": 6.5516711417720355e-06, "loss": 17.5892, "step": 22845 }, { "epoch": 0.4176065220172921, "grad_norm": 7.178860213931443, "learning_rate": 6.551389742341055e-06, "loss": 17.9891, "step": 22846 }, { "epoch": 0.41762480121373863, "grad_norm": 7.656582368173924, "learning_rate": 6.551108337472589e-06, "loss": 18.0129, "step": 22847 }, { "epoch": 0.41764308041018516, "grad_norm": 6.109532819932893, "learning_rate": 6.550826927167623e-06, "loss": 17.4247, "step": 22848 }, { "epoch": 0.4176613596066317, "grad_norm": 6.092443474453851, "learning_rate": 6.5505455114271424e-06, "loss": 17.3379, "step": 22849 }, { "epoch": 0.41767963880307823, "grad_norm": 6.133743134607131, "learning_rate": 6.550264090252134e-06, "loss": 17.7763, "step": 22850 }, { "epoch": 0.4176979179995247, "grad_norm": 8.763945002107015, "learning_rate": 6.549982663643586e-06, "loss": 18.1602, "step": 22851 }, { "epoch": 0.41771619719597125, "grad_norm": 6.210400200575394, "learning_rate": 6.549701231602484e-06, "loss": 17.2319, "step": 22852 }, { "epoch": 0.4177344763924178, "grad_norm": 6.661533010185124, "learning_rate": 6.54941979412981e-06, "loss": 17.7869, "step": 22853 }, { "epoch": 0.4177527555888643, "grad_norm": 6.898743035864414, "learning_rate": 6.549138351226556e-06, "loss": 17.7566, "step": 22854 }, { "epoch": 0.41777103478531086, "grad_norm": 6.548996978112739, "learning_rate": 6.548856902893708e-06, "loss": 17.5209, "step": 22855 }, { "epoch": 0.41778931398175734, "grad_norm": 7.238878738760886, "learning_rate": 6.5485754491322494e-06, "loss": 17.9588, "step": 22856 }, { "epoch": 0.4178075931782039, "grad_norm": 6.436757042608861, "learning_rate": 6.548293989943168e-06, "loss": 17.5837, "step": 22857 }, { "epoch": 0.4178258723746504, "grad_norm": 5.661168070102067, "learning_rate": 6.5480125253274505e-06, "loss": 17.3673, "step": 22858 }, { "epoch": 0.41784415157109694, "grad_norm": 6.605295512255456, "learning_rate": 6.547731055286085e-06, "loss": 17.3872, "step": 22859 }, { "epoch": 0.4178624307675435, "grad_norm": 7.909887497474025, "learning_rate": 6.5474495798200555e-06, "loss": 18.1409, "step": 22860 }, { "epoch": 0.41788070996398996, "grad_norm": 6.512755013831273, "learning_rate": 6.5471680989303495e-06, "loss": 17.5697, "step": 22861 }, { "epoch": 0.4178989891604365, "grad_norm": 5.761497015408156, "learning_rate": 6.546886612617953e-06, "loss": 17.1833, "step": 22862 }, { "epoch": 0.41791726835688303, "grad_norm": 5.362426299524855, "learning_rate": 6.546605120883854e-06, "loss": 17.2209, "step": 22863 }, { "epoch": 0.41793554755332957, "grad_norm": 5.837311405647309, "learning_rate": 6.546323623729038e-06, "loss": 17.453, "step": 22864 }, { "epoch": 0.4179538267497761, "grad_norm": 7.939985383396437, "learning_rate": 6.546042121154492e-06, "loss": 17.9857, "step": 22865 }, { "epoch": 0.4179721059462226, "grad_norm": 6.930859774128972, "learning_rate": 6.545760613161202e-06, "loss": 17.5704, "step": 22866 }, { "epoch": 0.4179903851426691, "grad_norm": 6.103396396617472, "learning_rate": 6.545479099750156e-06, "loss": 17.1374, "step": 22867 }, { "epoch": 0.41800866433911565, "grad_norm": 5.99368123074367, "learning_rate": 6.545197580922339e-06, "loss": 17.5591, "step": 22868 }, { "epoch": 0.4180269435355622, "grad_norm": 6.182348943063241, "learning_rate": 6.54491605667874e-06, "loss": 17.5682, "step": 22869 }, { "epoch": 0.4180452227320087, "grad_norm": 6.729086066286519, "learning_rate": 6.544634527020343e-06, "loss": 17.3765, "step": 22870 }, { "epoch": 0.4180635019284552, "grad_norm": 7.225283574756662, "learning_rate": 6.5443529919481355e-06, "loss": 17.8946, "step": 22871 }, { "epoch": 0.41808178112490174, "grad_norm": 7.6669910653718825, "learning_rate": 6.5440714514631056e-06, "loss": 18.1302, "step": 22872 }, { "epoch": 0.4181000603213483, "grad_norm": 5.850875944637185, "learning_rate": 6.54378990556624e-06, "loss": 17.4011, "step": 22873 }, { "epoch": 0.4181183395177948, "grad_norm": 7.086829435337445, "learning_rate": 6.5435083542585235e-06, "loss": 17.5344, "step": 22874 }, { "epoch": 0.41813661871424135, "grad_norm": 7.1525906763665885, "learning_rate": 6.543226797540945e-06, "loss": 18.6293, "step": 22875 }, { "epoch": 0.41815489791068783, "grad_norm": 7.118951815704408, "learning_rate": 6.542945235414489e-06, "loss": 17.6044, "step": 22876 }, { "epoch": 0.41817317710713436, "grad_norm": 6.265518363771976, "learning_rate": 6.542663667880145e-06, "loss": 17.3717, "step": 22877 }, { "epoch": 0.4181914563035809, "grad_norm": 6.87913448403044, "learning_rate": 6.5423820949388995e-06, "loss": 17.4798, "step": 22878 }, { "epoch": 0.41820973550002744, "grad_norm": 5.81084269600853, "learning_rate": 6.542100516591737e-06, "loss": 17.2091, "step": 22879 }, { "epoch": 0.4182280146964739, "grad_norm": 6.3975723906987625, "learning_rate": 6.541818932839646e-06, "loss": 17.7207, "step": 22880 }, { "epoch": 0.41824629389292045, "grad_norm": 6.452800317594072, "learning_rate": 6.541537343683615e-06, "loss": 17.413, "step": 22881 }, { "epoch": 0.418264573089367, "grad_norm": 6.346871107329755, "learning_rate": 6.541255749124629e-06, "loss": 17.3771, "step": 22882 }, { "epoch": 0.4182828522858135, "grad_norm": 6.923827081749899, "learning_rate": 6.5409741491636746e-06, "loss": 17.3506, "step": 22883 }, { "epoch": 0.41830113148226006, "grad_norm": 7.067361672916809, "learning_rate": 6.54069254380174e-06, "loss": 17.9107, "step": 22884 }, { "epoch": 0.41831941067870654, "grad_norm": 6.980080056759994, "learning_rate": 6.54041093303981e-06, "loss": 17.8602, "step": 22885 }, { "epoch": 0.4183376898751531, "grad_norm": 7.087277520644656, "learning_rate": 6.540129316878876e-06, "loss": 17.9097, "step": 22886 }, { "epoch": 0.4183559690715996, "grad_norm": 5.600155285651116, "learning_rate": 6.539847695319922e-06, "loss": 17.1198, "step": 22887 }, { "epoch": 0.41837424826804614, "grad_norm": 6.921820599430229, "learning_rate": 6.539566068363934e-06, "loss": 17.7032, "step": 22888 }, { "epoch": 0.4183925274644927, "grad_norm": 7.571194767346009, "learning_rate": 6.539284436011901e-06, "loss": 17.7819, "step": 22889 }, { "epoch": 0.41841080666093916, "grad_norm": 6.197019217095146, "learning_rate": 6.539002798264811e-06, "loss": 17.2801, "step": 22890 }, { "epoch": 0.4184290858573857, "grad_norm": 6.59662714682823, "learning_rate": 6.5387211551236485e-06, "loss": 17.7627, "step": 22891 }, { "epoch": 0.41844736505383223, "grad_norm": 5.962347021512397, "learning_rate": 6.538439506589401e-06, "loss": 17.2985, "step": 22892 }, { "epoch": 0.41846564425027877, "grad_norm": 6.084208894677302, "learning_rate": 6.538157852663059e-06, "loss": 17.3511, "step": 22893 }, { "epoch": 0.4184839234467253, "grad_norm": 8.792003026134429, "learning_rate": 6.537876193345605e-06, "loss": 18.112, "step": 22894 }, { "epoch": 0.4185022026431718, "grad_norm": 5.730695756311766, "learning_rate": 6.537594528638028e-06, "loss": 17.2505, "step": 22895 }, { "epoch": 0.4185204818396183, "grad_norm": 5.973897008972709, "learning_rate": 6.537312858541317e-06, "loss": 17.3319, "step": 22896 }, { "epoch": 0.41853876103606485, "grad_norm": 5.26638274908141, "learning_rate": 6.537031183056459e-06, "loss": 17.4098, "step": 22897 }, { "epoch": 0.4185570402325114, "grad_norm": 5.63597513759987, "learning_rate": 6.536749502184437e-06, "loss": 17.3671, "step": 22898 }, { "epoch": 0.4185753194289579, "grad_norm": 6.58035156249633, "learning_rate": 6.536467815926243e-06, "loss": 17.7771, "step": 22899 }, { "epoch": 0.4185935986254044, "grad_norm": 7.699676029301902, "learning_rate": 6.5361861242828635e-06, "loss": 18.0079, "step": 22900 }, { "epoch": 0.41861187782185094, "grad_norm": 5.7441541297691785, "learning_rate": 6.535904427255284e-06, "loss": 17.2305, "step": 22901 }, { "epoch": 0.4186301570182975, "grad_norm": 7.929346601580015, "learning_rate": 6.535622724844492e-06, "loss": 17.9838, "step": 22902 }, { "epoch": 0.418648436214744, "grad_norm": 5.295553888626519, "learning_rate": 6.535341017051477e-06, "loss": 17.1963, "step": 22903 }, { "epoch": 0.41866671541119055, "grad_norm": 9.134796239102027, "learning_rate": 6.535059303877224e-06, "loss": 17.7302, "step": 22904 }, { "epoch": 0.41868499460763703, "grad_norm": 7.049232928319126, "learning_rate": 6.534777585322722e-06, "loss": 17.7632, "step": 22905 }, { "epoch": 0.41870327380408356, "grad_norm": 6.862135989837325, "learning_rate": 6.5344958613889575e-06, "loss": 17.8258, "step": 22906 }, { "epoch": 0.4187215530005301, "grad_norm": 7.14700917026873, "learning_rate": 6.534214132076918e-06, "loss": 17.7554, "step": 22907 }, { "epoch": 0.41873983219697664, "grad_norm": 6.623554884593093, "learning_rate": 6.533932397387591e-06, "loss": 17.637, "step": 22908 }, { "epoch": 0.41875811139342317, "grad_norm": 7.292976780208033, "learning_rate": 6.533650657321965e-06, "loss": 18.3185, "step": 22909 }, { "epoch": 0.41877639058986965, "grad_norm": 6.2192238774027775, "learning_rate": 6.5333689118810265e-06, "loss": 17.295, "step": 22910 }, { "epoch": 0.4187946697863162, "grad_norm": 6.987670920154048, "learning_rate": 6.533087161065762e-06, "loss": 17.6673, "step": 22911 }, { "epoch": 0.4188129489827627, "grad_norm": 6.243765544688094, "learning_rate": 6.5328054048771594e-06, "loss": 17.4971, "step": 22912 }, { "epoch": 0.41883122817920926, "grad_norm": 7.625280996094554, "learning_rate": 6.5325236433162084e-06, "loss": 18.003, "step": 22913 }, { "epoch": 0.41884950737565574, "grad_norm": 6.994930904578712, "learning_rate": 6.5322418763838954e-06, "loss": 17.5114, "step": 22914 }, { "epoch": 0.4188677865721023, "grad_norm": 7.214030486165409, "learning_rate": 6.531960104081206e-06, "loss": 17.4588, "step": 22915 }, { "epoch": 0.4188860657685488, "grad_norm": 6.766063991825144, "learning_rate": 6.53167832640913e-06, "loss": 17.5853, "step": 22916 }, { "epoch": 0.41890434496499535, "grad_norm": 5.607349715613173, "learning_rate": 6.531396543368653e-06, "loss": 17.0989, "step": 22917 }, { "epoch": 0.4189226241614419, "grad_norm": 6.568315322497222, "learning_rate": 6.531114754960767e-06, "loss": 17.6328, "step": 22918 }, { "epoch": 0.41894090335788836, "grad_norm": 6.6660130541892455, "learning_rate": 6.5308329611864555e-06, "loss": 17.5281, "step": 22919 }, { "epoch": 0.4189591825543349, "grad_norm": 5.385308447069871, "learning_rate": 6.5305511620467065e-06, "loss": 17.34, "step": 22920 }, { "epoch": 0.41897746175078143, "grad_norm": 5.628795621941321, "learning_rate": 6.530269357542509e-06, "loss": 16.9445, "step": 22921 }, { "epoch": 0.41899574094722797, "grad_norm": 6.669207417037738, "learning_rate": 6.52998754767485e-06, "loss": 17.6181, "step": 22922 }, { "epoch": 0.4190140201436745, "grad_norm": 7.998858964954297, "learning_rate": 6.529705732444716e-06, "loss": 17.9669, "step": 22923 }, { "epoch": 0.419032299340121, "grad_norm": 8.60196555676147, "learning_rate": 6.529423911853099e-06, "loss": 18.0825, "step": 22924 }, { "epoch": 0.4190505785365675, "grad_norm": 6.788464748694883, "learning_rate": 6.529142085900981e-06, "loss": 17.6837, "step": 22925 }, { "epoch": 0.41906885773301406, "grad_norm": 6.576786036431059, "learning_rate": 6.528860254589356e-06, "loss": 17.4223, "step": 22926 }, { "epoch": 0.4190871369294606, "grad_norm": 6.463190432849364, "learning_rate": 6.528578417919206e-06, "loss": 17.5257, "step": 22927 }, { "epoch": 0.4191054161259071, "grad_norm": 6.992534174268173, "learning_rate": 6.528296575891523e-06, "loss": 17.8828, "step": 22928 }, { "epoch": 0.4191236953223536, "grad_norm": 7.223467350509907, "learning_rate": 6.5280147285072915e-06, "loss": 17.7576, "step": 22929 }, { "epoch": 0.41914197451880014, "grad_norm": 6.330170602348665, "learning_rate": 6.527732875767501e-06, "loss": 17.417, "step": 22930 }, { "epoch": 0.4191602537152467, "grad_norm": 6.005615070281294, "learning_rate": 6.527451017673141e-06, "loss": 17.3151, "step": 22931 }, { "epoch": 0.4191785329116932, "grad_norm": 6.5658764577434, "learning_rate": 6.527169154225196e-06, "loss": 17.2052, "step": 22932 }, { "epoch": 0.41919681210813975, "grad_norm": 7.586881698945212, "learning_rate": 6.526887285424657e-06, "loss": 18.1262, "step": 22933 }, { "epoch": 0.41921509130458623, "grad_norm": 8.520250697467581, "learning_rate": 6.526605411272509e-06, "loss": 18.5186, "step": 22934 }, { "epoch": 0.41923337050103276, "grad_norm": 6.0149379766618445, "learning_rate": 6.5263235317697425e-06, "loss": 17.394, "step": 22935 }, { "epoch": 0.4192516496974793, "grad_norm": 7.677557024289154, "learning_rate": 6.526041646917344e-06, "loss": 18.3367, "step": 22936 }, { "epoch": 0.41926992889392584, "grad_norm": 8.267497217504378, "learning_rate": 6.525759756716302e-06, "loss": 18.0889, "step": 22937 }, { "epoch": 0.41928820809037237, "grad_norm": 6.730750142506911, "learning_rate": 6.525477861167606e-06, "loss": 17.9213, "step": 22938 }, { "epoch": 0.41930648728681885, "grad_norm": 5.9702144330438385, "learning_rate": 6.52519596027224e-06, "loss": 17.4348, "step": 22939 }, { "epoch": 0.4193247664832654, "grad_norm": 6.446330953569598, "learning_rate": 6.524914054031195e-06, "loss": 17.8092, "step": 22940 }, { "epoch": 0.4193430456797119, "grad_norm": 6.5551284079364756, "learning_rate": 6.52463214244546e-06, "loss": 17.7906, "step": 22941 }, { "epoch": 0.41936132487615846, "grad_norm": 5.762021855641645, "learning_rate": 6.524350225516022e-06, "loss": 17.2061, "step": 22942 }, { "epoch": 0.419379604072605, "grad_norm": 7.679490353746419, "learning_rate": 6.5240683032438665e-06, "loss": 17.8234, "step": 22943 }, { "epoch": 0.4193978832690515, "grad_norm": 5.921225542284149, "learning_rate": 6.5237863756299845e-06, "loss": 17.2429, "step": 22944 }, { "epoch": 0.419416162465498, "grad_norm": 7.252687262905585, "learning_rate": 6.523504442675366e-06, "loss": 17.5584, "step": 22945 }, { "epoch": 0.41943444166194455, "grad_norm": 5.835753477697853, "learning_rate": 6.523222504380994e-06, "loss": 17.2518, "step": 22946 }, { "epoch": 0.4194527208583911, "grad_norm": 5.739807876177038, "learning_rate": 6.522940560747859e-06, "loss": 17.1743, "step": 22947 }, { "epoch": 0.41947100005483756, "grad_norm": 7.819202206090441, "learning_rate": 6.5226586117769504e-06, "loss": 18.5037, "step": 22948 }, { "epoch": 0.4194892792512841, "grad_norm": 6.0028511634436015, "learning_rate": 6.522376657469256e-06, "loss": 17.5147, "step": 22949 }, { "epoch": 0.41950755844773063, "grad_norm": 5.319944123196091, "learning_rate": 6.522094697825763e-06, "loss": 16.9886, "step": 22950 }, { "epoch": 0.41952583764417717, "grad_norm": 5.881423393874906, "learning_rate": 6.52181273284746e-06, "loss": 17.3217, "step": 22951 }, { "epoch": 0.4195441168406237, "grad_norm": 5.815097478620257, "learning_rate": 6.521530762535336e-06, "loss": 17.3631, "step": 22952 }, { "epoch": 0.4195623960370702, "grad_norm": 5.740822827179228, "learning_rate": 6.521248786890377e-06, "loss": 17.2605, "step": 22953 }, { "epoch": 0.4195806752335167, "grad_norm": 6.358788451252137, "learning_rate": 6.5209668059135755e-06, "loss": 17.606, "step": 22954 }, { "epoch": 0.41959895442996326, "grad_norm": 5.87558205935959, "learning_rate": 6.520684819605917e-06, "loss": 17.2332, "step": 22955 }, { "epoch": 0.4196172336264098, "grad_norm": 6.093464300381178, "learning_rate": 6.520402827968389e-06, "loss": 17.5618, "step": 22956 }, { "epoch": 0.4196355128228563, "grad_norm": 5.730160457463107, "learning_rate": 6.5201208310019815e-06, "loss": 17.323, "step": 22957 }, { "epoch": 0.4196537920193028, "grad_norm": 6.346543923820535, "learning_rate": 6.51983882870768e-06, "loss": 17.4396, "step": 22958 }, { "epoch": 0.41967207121574934, "grad_norm": 7.25686662683186, "learning_rate": 6.519556821086479e-06, "loss": 17.8619, "step": 22959 }, { "epoch": 0.4196903504121959, "grad_norm": 6.531134559914157, "learning_rate": 6.519274808139362e-06, "loss": 17.317, "step": 22960 }, { "epoch": 0.4197086296086424, "grad_norm": 17.641543513914314, "learning_rate": 6.5189927898673174e-06, "loss": 18.4942, "step": 22961 }, { "epoch": 0.41972690880508895, "grad_norm": 7.338485644450107, "learning_rate": 6.518710766271337e-06, "loss": 17.8047, "step": 22962 }, { "epoch": 0.41974518800153543, "grad_norm": 7.69381003070154, "learning_rate": 6.518428737352406e-06, "loss": 18.1808, "step": 22963 }, { "epoch": 0.41976346719798197, "grad_norm": 5.71042703009896, "learning_rate": 6.518146703111513e-06, "loss": 17.1054, "step": 22964 }, { "epoch": 0.4197817463944285, "grad_norm": 5.663902545869665, "learning_rate": 6.517864663549649e-06, "loss": 17.2465, "step": 22965 }, { "epoch": 0.41980002559087504, "grad_norm": 6.541940914774427, "learning_rate": 6.5175826186678e-06, "loss": 17.487, "step": 22966 }, { "epoch": 0.4198183047873216, "grad_norm": 5.950104287109007, "learning_rate": 6.517300568466956e-06, "loss": 17.3955, "step": 22967 }, { "epoch": 0.41983658398376805, "grad_norm": 6.896729789912742, "learning_rate": 6.517018512948106e-06, "loss": 17.9357, "step": 22968 }, { "epoch": 0.4198548631802146, "grad_norm": 5.469552240937213, "learning_rate": 6.516736452112238e-06, "loss": 17.0821, "step": 22969 }, { "epoch": 0.4198731423766611, "grad_norm": 4.997359057882777, "learning_rate": 6.51645438596034e-06, "loss": 16.9295, "step": 22970 }, { "epoch": 0.41989142157310766, "grad_norm": 5.3222105377112765, "learning_rate": 6.516172314493399e-06, "loss": 17.0438, "step": 22971 }, { "epoch": 0.4199097007695542, "grad_norm": 5.246128332099832, "learning_rate": 6.515890237712408e-06, "loss": 16.9717, "step": 22972 }, { "epoch": 0.4199279799660007, "grad_norm": 5.515928106494591, "learning_rate": 6.515608155618353e-06, "loss": 17.2534, "step": 22973 }, { "epoch": 0.4199462591624472, "grad_norm": 8.869645870466277, "learning_rate": 6.515326068212222e-06, "loss": 17.8627, "step": 22974 }, { "epoch": 0.41996453835889375, "grad_norm": 6.34706392084448, "learning_rate": 6.515043975495005e-06, "loss": 17.3602, "step": 22975 }, { "epoch": 0.4199828175553403, "grad_norm": 9.963344186945106, "learning_rate": 6.5147618774676905e-06, "loss": 18.176, "step": 22976 }, { "epoch": 0.4200010967517868, "grad_norm": 5.851374040972541, "learning_rate": 6.514479774131266e-06, "loss": 17.4657, "step": 22977 }, { "epoch": 0.4200193759482333, "grad_norm": 4.702057656452562, "learning_rate": 6.514197665486723e-06, "loss": 16.7217, "step": 22978 }, { "epoch": 0.42003765514467983, "grad_norm": 5.870093732208084, "learning_rate": 6.513915551535047e-06, "loss": 17.2361, "step": 22979 }, { "epoch": 0.42005593434112637, "grad_norm": 5.632922764412533, "learning_rate": 6.513633432277229e-06, "loss": 17.2576, "step": 22980 }, { "epoch": 0.4200742135375729, "grad_norm": 8.060190828372876, "learning_rate": 6.513351307714257e-06, "loss": 18.1932, "step": 22981 }, { "epoch": 0.4200924927340194, "grad_norm": 6.025354070894363, "learning_rate": 6.51306917784712e-06, "loss": 17.4459, "step": 22982 }, { "epoch": 0.4201107719304659, "grad_norm": 6.708815475795794, "learning_rate": 6.512787042676808e-06, "loss": 17.6669, "step": 22983 }, { "epoch": 0.42012905112691246, "grad_norm": 7.122797819201618, "learning_rate": 6.512504902204309e-06, "loss": 17.6847, "step": 22984 }, { "epoch": 0.420147330323359, "grad_norm": 5.589616164174937, "learning_rate": 6.512222756430609e-06, "loss": 17.0379, "step": 22985 }, { "epoch": 0.4201656095198055, "grad_norm": 6.83522881316481, "learning_rate": 6.5119406053567e-06, "loss": 17.559, "step": 22986 }, { "epoch": 0.420183888716252, "grad_norm": 7.503206816650065, "learning_rate": 6.511658448983572e-06, "loss": 17.8493, "step": 22987 }, { "epoch": 0.42020216791269854, "grad_norm": 6.789844118966189, "learning_rate": 6.511376287312212e-06, "loss": 17.7367, "step": 22988 }, { "epoch": 0.4202204471091451, "grad_norm": 7.627172290430774, "learning_rate": 6.511094120343608e-06, "loss": 17.6574, "step": 22989 }, { "epoch": 0.4202387263055916, "grad_norm": 7.249181799702012, "learning_rate": 6.510811948078751e-06, "loss": 17.883, "step": 22990 }, { "epoch": 0.42025700550203815, "grad_norm": 6.714112096450881, "learning_rate": 6.51052977051863e-06, "loss": 17.7014, "step": 22991 }, { "epoch": 0.42027528469848463, "grad_norm": 6.294913210124984, "learning_rate": 6.510247587664231e-06, "loss": 17.3123, "step": 22992 }, { "epoch": 0.42029356389493117, "grad_norm": 7.10322660420361, "learning_rate": 6.509965399516547e-06, "loss": 17.5675, "step": 22993 }, { "epoch": 0.4203118430913777, "grad_norm": 9.548721430594611, "learning_rate": 6.509683206076565e-06, "loss": 18.3707, "step": 22994 }, { "epoch": 0.42033012228782424, "grad_norm": 5.962714972520607, "learning_rate": 6.509401007345275e-06, "loss": 17.2836, "step": 22995 }, { "epoch": 0.4203484014842708, "grad_norm": 6.639399571622893, "learning_rate": 6.509118803323664e-06, "loss": 17.5949, "step": 22996 }, { "epoch": 0.42036668068071725, "grad_norm": 7.976306646430064, "learning_rate": 6.508836594012724e-06, "loss": 18.7225, "step": 22997 }, { "epoch": 0.4203849598771638, "grad_norm": 6.963961481132685, "learning_rate": 6.508554379413441e-06, "loss": 17.7807, "step": 22998 }, { "epoch": 0.4204032390736103, "grad_norm": 6.820626992447245, "learning_rate": 6.508272159526807e-06, "loss": 17.661, "step": 22999 }, { "epoch": 0.42042151827005686, "grad_norm": 5.6887619432817855, "learning_rate": 6.507989934353811e-06, "loss": 17.4962, "step": 23000 }, { "epoch": 0.4204397974665034, "grad_norm": 6.520621347198876, "learning_rate": 6.507707703895441e-06, "loss": 17.683, "step": 23001 }, { "epoch": 0.4204580766629499, "grad_norm": 7.230178250681007, "learning_rate": 6.507425468152684e-06, "loss": 17.8178, "step": 23002 }, { "epoch": 0.4204763558593964, "grad_norm": 7.004311385823891, "learning_rate": 6.5071432271265325e-06, "loss": 17.6068, "step": 23003 }, { "epoch": 0.42049463505584295, "grad_norm": 6.563694562402713, "learning_rate": 6.506860980817975e-06, "loss": 17.8427, "step": 23004 }, { "epoch": 0.4205129142522895, "grad_norm": 5.298064693816009, "learning_rate": 6.506578729228002e-06, "loss": 17.1916, "step": 23005 }, { "epoch": 0.420531193448736, "grad_norm": 5.726445908275986, "learning_rate": 6.5062964723575984e-06, "loss": 17.3356, "step": 23006 }, { "epoch": 0.4205494726451825, "grad_norm": 7.583442717564308, "learning_rate": 6.506014210207758e-06, "loss": 18.0529, "step": 23007 }, { "epoch": 0.42056775184162903, "grad_norm": 6.949887057451262, "learning_rate": 6.505731942779469e-06, "loss": 17.7024, "step": 23008 }, { "epoch": 0.42058603103807557, "grad_norm": 9.40189290605422, "learning_rate": 6.505449670073719e-06, "loss": 18.596, "step": 23009 }, { "epoch": 0.4206043102345221, "grad_norm": 6.785372381138408, "learning_rate": 6.505167392091499e-06, "loss": 17.7004, "step": 23010 }, { "epoch": 0.42062258943096864, "grad_norm": 5.254013375883744, "learning_rate": 6.5048851088338e-06, "loss": 17.0435, "step": 23011 }, { "epoch": 0.4206408686274151, "grad_norm": 6.722683697860286, "learning_rate": 6.5046028203016056e-06, "loss": 17.5095, "step": 23012 }, { "epoch": 0.42065914782386166, "grad_norm": 11.684252960700595, "learning_rate": 6.50432052649591e-06, "loss": 19.2351, "step": 23013 }, { "epoch": 0.4206774270203082, "grad_norm": 5.700003385395421, "learning_rate": 6.504038227417703e-06, "loss": 17.2616, "step": 23014 }, { "epoch": 0.42069570621675473, "grad_norm": 6.3408082485463115, "learning_rate": 6.503755923067972e-06, "loss": 17.5155, "step": 23015 }, { "epoch": 0.4207139854132012, "grad_norm": 6.399662701364488, "learning_rate": 6.5034736134477064e-06, "loss": 17.48, "step": 23016 }, { "epoch": 0.42073226460964774, "grad_norm": 7.063894437669993, "learning_rate": 6.503191298557895e-06, "loss": 17.5484, "step": 23017 }, { "epoch": 0.4207505438060943, "grad_norm": 5.456594985238224, "learning_rate": 6.502908978399531e-06, "loss": 17.2491, "step": 23018 }, { "epoch": 0.4207688230025408, "grad_norm": 6.251009222255237, "learning_rate": 6.502626652973601e-06, "loss": 17.478, "step": 23019 }, { "epoch": 0.42078710219898735, "grad_norm": 5.105701868192694, "learning_rate": 6.502344322281093e-06, "loss": 16.9507, "step": 23020 }, { "epoch": 0.42080538139543383, "grad_norm": 5.973386292605992, "learning_rate": 6.502061986323001e-06, "loss": 17.4321, "step": 23021 }, { "epoch": 0.42082366059188037, "grad_norm": 6.355501402620701, "learning_rate": 6.50177964510031e-06, "loss": 17.5487, "step": 23022 }, { "epoch": 0.4208419397883269, "grad_norm": 6.529065987468653, "learning_rate": 6.501497298614012e-06, "loss": 17.4421, "step": 23023 }, { "epoch": 0.42086021898477344, "grad_norm": 4.734227154551184, "learning_rate": 6.501214946865099e-06, "loss": 16.8256, "step": 23024 }, { "epoch": 0.42087849818122, "grad_norm": 6.475746937751447, "learning_rate": 6.500932589854554e-06, "loss": 17.6178, "step": 23025 }, { "epoch": 0.42089677737766645, "grad_norm": 7.898971635939524, "learning_rate": 6.500650227583373e-06, "loss": 17.9597, "step": 23026 }, { "epoch": 0.420915056574113, "grad_norm": 5.868163200725381, "learning_rate": 6.500367860052542e-06, "loss": 17.55, "step": 23027 }, { "epoch": 0.4209333357705595, "grad_norm": 6.305679871062329, "learning_rate": 6.500085487263054e-06, "loss": 17.4224, "step": 23028 }, { "epoch": 0.42095161496700606, "grad_norm": 7.460606064130908, "learning_rate": 6.4998031092158945e-06, "loss": 17.7353, "step": 23029 }, { "epoch": 0.4209698941634526, "grad_norm": 7.432347668733622, "learning_rate": 6.4995207259120545e-06, "loss": 18.1434, "step": 23030 }, { "epoch": 0.4209881733598991, "grad_norm": 7.0641466773516255, "learning_rate": 6.499238337352526e-06, "loss": 17.4237, "step": 23031 }, { "epoch": 0.4210064525563456, "grad_norm": 5.934801043033059, "learning_rate": 6.498955943538296e-06, "loss": 17.3854, "step": 23032 }, { "epoch": 0.42102473175279215, "grad_norm": 5.9483563809520374, "learning_rate": 6.498673544470357e-06, "loss": 17.0176, "step": 23033 }, { "epoch": 0.4210430109492387, "grad_norm": 6.40693209554785, "learning_rate": 6.498391140149697e-06, "loss": 17.5485, "step": 23034 }, { "epoch": 0.4210612901456852, "grad_norm": 6.269989729490269, "learning_rate": 6.498108730577305e-06, "loss": 17.344, "step": 23035 }, { "epoch": 0.4210795693421317, "grad_norm": 5.74404053994302, "learning_rate": 6.4978263157541724e-06, "loss": 17.3322, "step": 23036 }, { "epoch": 0.42109784853857823, "grad_norm": 6.126397048386426, "learning_rate": 6.497543895681289e-06, "loss": 17.4026, "step": 23037 }, { "epoch": 0.42111612773502477, "grad_norm": 7.4906858377145245, "learning_rate": 6.497261470359645e-06, "loss": 17.9141, "step": 23038 }, { "epoch": 0.4211344069314713, "grad_norm": 7.246093139301408, "learning_rate": 6.496979039790228e-06, "loss": 17.6428, "step": 23039 }, { "epoch": 0.42115268612791784, "grad_norm": 5.9149978582310725, "learning_rate": 6.496696603974029e-06, "loss": 17.2904, "step": 23040 }, { "epoch": 0.4211709653243643, "grad_norm": 5.73176407449862, "learning_rate": 6.496414162912039e-06, "loss": 17.5765, "step": 23041 }, { "epoch": 0.42118924452081086, "grad_norm": 6.743509820661, "learning_rate": 6.496131716605247e-06, "loss": 17.6755, "step": 23042 }, { "epoch": 0.4212075237172574, "grad_norm": 21.24521701980779, "learning_rate": 6.495849265054645e-06, "loss": 17.8667, "step": 23043 }, { "epoch": 0.42122580291370393, "grad_norm": 7.545617373943087, "learning_rate": 6.495566808261218e-06, "loss": 17.9976, "step": 23044 }, { "epoch": 0.42124408211015046, "grad_norm": 7.0451922621308105, "learning_rate": 6.4952843462259605e-06, "loss": 17.7555, "step": 23045 }, { "epoch": 0.42126236130659694, "grad_norm": 6.393711946691229, "learning_rate": 6.495001878949862e-06, "loss": 17.5035, "step": 23046 }, { "epoch": 0.4212806405030435, "grad_norm": 7.00282923224376, "learning_rate": 6.4947194064339106e-06, "loss": 17.6665, "step": 23047 }, { "epoch": 0.42129891969949, "grad_norm": 5.760467812965505, "learning_rate": 6.494436928679098e-06, "loss": 17.2732, "step": 23048 }, { "epoch": 0.42131719889593655, "grad_norm": 7.456025998569738, "learning_rate": 6.494154445686413e-06, "loss": 17.6858, "step": 23049 }, { "epoch": 0.42133547809238303, "grad_norm": 5.678786638497476, "learning_rate": 6.493871957456847e-06, "loss": 17.263, "step": 23050 }, { "epoch": 0.42135375728882957, "grad_norm": 6.27811710201321, "learning_rate": 6.493589463991389e-06, "loss": 17.6338, "step": 23051 }, { "epoch": 0.4213720364852761, "grad_norm": 6.141284749832784, "learning_rate": 6.4933069652910286e-06, "loss": 17.4214, "step": 23052 }, { "epoch": 0.42139031568172264, "grad_norm": 6.329698450530063, "learning_rate": 6.4930244613567585e-06, "loss": 17.258, "step": 23053 }, { "epoch": 0.4214085948781692, "grad_norm": 5.062037776340806, "learning_rate": 6.492741952189566e-06, "loss": 16.6702, "step": 23054 }, { "epoch": 0.42142687407461565, "grad_norm": 6.427858729614545, "learning_rate": 6.492459437790444e-06, "loss": 17.6709, "step": 23055 }, { "epoch": 0.4214451532710622, "grad_norm": 8.082825206280688, "learning_rate": 6.49217691816038e-06, "loss": 17.9099, "step": 23056 }, { "epoch": 0.4214634324675087, "grad_norm": 6.148529649176768, "learning_rate": 6.4918943933003654e-06, "loss": 17.223, "step": 23057 }, { "epoch": 0.42148171166395526, "grad_norm": 6.107731647693613, "learning_rate": 6.49161186321139e-06, "loss": 17.5471, "step": 23058 }, { "epoch": 0.4214999908604018, "grad_norm": 6.598753730395351, "learning_rate": 6.491329327894447e-06, "loss": 17.6031, "step": 23059 }, { "epoch": 0.4215182700568483, "grad_norm": 6.269006757437165, "learning_rate": 6.4910467873505215e-06, "loss": 17.3866, "step": 23060 }, { "epoch": 0.4215365492532948, "grad_norm": 8.63377623937528, "learning_rate": 6.490764241580607e-06, "loss": 18.8411, "step": 23061 }, { "epoch": 0.42155482844974135, "grad_norm": 6.131700349978271, "learning_rate": 6.490481690585694e-06, "loss": 17.1781, "step": 23062 }, { "epoch": 0.4215731076461879, "grad_norm": 5.9149467235687485, "learning_rate": 6.49019913436677e-06, "loss": 17.239, "step": 23063 }, { "epoch": 0.4215913868426344, "grad_norm": 7.234875253028634, "learning_rate": 6.489916572924829e-06, "loss": 17.8849, "step": 23064 }, { "epoch": 0.4216096660390809, "grad_norm": 6.330188920281069, "learning_rate": 6.4896340062608595e-06, "loss": 17.2995, "step": 23065 }, { "epoch": 0.42162794523552743, "grad_norm": 7.635884789761982, "learning_rate": 6.489351434375852e-06, "loss": 18.158, "step": 23066 }, { "epoch": 0.42164622443197397, "grad_norm": 7.506853994220852, "learning_rate": 6.4890688572707975e-06, "loss": 17.9646, "step": 23067 }, { "epoch": 0.4216645036284205, "grad_norm": 7.183464164255513, "learning_rate": 6.488786274946684e-06, "loss": 17.7851, "step": 23068 }, { "epoch": 0.42168278282486704, "grad_norm": 6.9744005775285, "learning_rate": 6.488503687404506e-06, "loss": 18.1321, "step": 23069 }, { "epoch": 0.4217010620213135, "grad_norm": 5.343039344496768, "learning_rate": 6.4882210946452515e-06, "loss": 17.066, "step": 23070 }, { "epoch": 0.42171934121776006, "grad_norm": 7.2410374871601615, "learning_rate": 6.48793849666991e-06, "loss": 17.9285, "step": 23071 }, { "epoch": 0.4217376204142066, "grad_norm": 6.736478462705318, "learning_rate": 6.487655893479473e-06, "loss": 17.6948, "step": 23072 }, { "epoch": 0.42175589961065313, "grad_norm": 8.07099306422754, "learning_rate": 6.487373285074933e-06, "loss": 17.7591, "step": 23073 }, { "epoch": 0.42177417880709966, "grad_norm": 8.15454074081976, "learning_rate": 6.487090671457278e-06, "loss": 17.8959, "step": 23074 }, { "epoch": 0.42179245800354614, "grad_norm": 5.813501670173463, "learning_rate": 6.4868080526274975e-06, "loss": 17.3495, "step": 23075 }, { "epoch": 0.4218107371999927, "grad_norm": 6.051453057483406, "learning_rate": 6.4865254285865855e-06, "loss": 17.4222, "step": 23076 }, { "epoch": 0.4218290163964392, "grad_norm": 5.7059688922023994, "learning_rate": 6.4862427993355315e-06, "loss": 17.3903, "step": 23077 }, { "epoch": 0.42184729559288575, "grad_norm": 7.380393933533788, "learning_rate": 6.485960164875323e-06, "loss": 17.5945, "step": 23078 }, { "epoch": 0.4218655747893323, "grad_norm": 5.822752000654732, "learning_rate": 6.485677525206955e-06, "loss": 17.3908, "step": 23079 }, { "epoch": 0.42188385398577877, "grad_norm": 6.866617406183222, "learning_rate": 6.485394880331416e-06, "loss": 17.6554, "step": 23080 }, { "epoch": 0.4219021331822253, "grad_norm": 6.685040144882246, "learning_rate": 6.485112230249696e-06, "loss": 17.6405, "step": 23081 }, { "epoch": 0.42192041237867184, "grad_norm": 5.7851308625382645, "learning_rate": 6.484829574962788e-06, "loss": 17.2046, "step": 23082 }, { "epoch": 0.4219386915751184, "grad_norm": 7.830516847175503, "learning_rate": 6.484546914471681e-06, "loss": 18.1744, "step": 23083 }, { "epoch": 0.42195697077156485, "grad_norm": 7.663114401718743, "learning_rate": 6.484264248777365e-06, "loss": 17.722, "step": 23084 }, { "epoch": 0.4219752499680114, "grad_norm": 6.8836285566600095, "learning_rate": 6.48398157788083e-06, "loss": 17.803, "step": 23085 }, { "epoch": 0.4219935291644579, "grad_norm": 6.639189576399866, "learning_rate": 6.4836989017830705e-06, "loss": 17.9358, "step": 23086 }, { "epoch": 0.42201180836090446, "grad_norm": 6.4209760802856835, "learning_rate": 6.483416220485076e-06, "loss": 17.5268, "step": 23087 }, { "epoch": 0.422030087557351, "grad_norm": 6.811218033578101, "learning_rate": 6.4831335339878355e-06, "loss": 18.1312, "step": 23088 }, { "epoch": 0.4220483667537975, "grad_norm": 8.252758684835596, "learning_rate": 6.4828508422923394e-06, "loss": 17.8009, "step": 23089 }, { "epoch": 0.422066645950244, "grad_norm": 5.580805128115323, "learning_rate": 6.4825681453995805e-06, "loss": 17.2021, "step": 23090 }, { "epoch": 0.42208492514669055, "grad_norm": 6.77878781847524, "learning_rate": 6.482285443310549e-06, "loss": 17.3954, "step": 23091 }, { "epoch": 0.4221032043431371, "grad_norm": 5.443725019425856, "learning_rate": 6.482002736026236e-06, "loss": 17.1061, "step": 23092 }, { "epoch": 0.4221214835395836, "grad_norm": 7.432860429111075, "learning_rate": 6.481720023547631e-06, "loss": 17.858, "step": 23093 }, { "epoch": 0.4221397627360301, "grad_norm": 7.115159419646255, "learning_rate": 6.481437305875727e-06, "loss": 17.8522, "step": 23094 }, { "epoch": 0.42215804193247664, "grad_norm": 6.601983738113477, "learning_rate": 6.481154583011513e-06, "loss": 17.8641, "step": 23095 }, { "epoch": 0.42217632112892317, "grad_norm": 6.441212479276569, "learning_rate": 6.4808718549559815e-06, "loss": 17.6611, "step": 23096 }, { "epoch": 0.4221946003253697, "grad_norm": 5.8398323228293, "learning_rate": 6.480589121710123e-06, "loss": 17.3993, "step": 23097 }, { "epoch": 0.42221287952181624, "grad_norm": 5.15225786937002, "learning_rate": 6.480306383274926e-06, "loss": 17.003, "step": 23098 }, { "epoch": 0.4222311587182627, "grad_norm": 6.037309898579251, "learning_rate": 6.480023639651385e-06, "loss": 17.1447, "step": 23099 }, { "epoch": 0.42224943791470926, "grad_norm": 7.347191776281626, "learning_rate": 6.47974089084049e-06, "loss": 18.1314, "step": 23100 }, { "epoch": 0.4222677171111558, "grad_norm": 7.158394307901473, "learning_rate": 6.479458136843232e-06, "loss": 17.8036, "step": 23101 }, { "epoch": 0.42228599630760233, "grad_norm": 6.573547552557479, "learning_rate": 6.479175377660601e-06, "loss": 17.4141, "step": 23102 }, { "epoch": 0.42230427550404886, "grad_norm": 6.198939740718497, "learning_rate": 6.478892613293586e-06, "loss": 17.4501, "step": 23103 }, { "epoch": 0.42232255470049535, "grad_norm": 6.512008325165434, "learning_rate": 6.4786098437431845e-06, "loss": 17.7391, "step": 23104 }, { "epoch": 0.4223408338969419, "grad_norm": 7.510655871600087, "learning_rate": 6.478327069010381e-06, "loss": 17.8093, "step": 23105 }, { "epoch": 0.4223591130933884, "grad_norm": 6.438621615415397, "learning_rate": 6.478044289096173e-06, "loss": 17.5805, "step": 23106 }, { "epoch": 0.42237739228983495, "grad_norm": 7.336705497786379, "learning_rate": 6.477761504001545e-06, "loss": 17.7835, "step": 23107 }, { "epoch": 0.4223956714862815, "grad_norm": 5.87458767097228, "learning_rate": 6.477478713727492e-06, "loss": 17.4316, "step": 23108 }, { "epoch": 0.42241395068272797, "grad_norm": 6.552670239449119, "learning_rate": 6.477195918275003e-06, "loss": 17.901, "step": 23109 }, { "epoch": 0.4224322298791745, "grad_norm": 6.2558059341346475, "learning_rate": 6.476913117645073e-06, "loss": 17.6013, "step": 23110 }, { "epoch": 0.42245050907562104, "grad_norm": 6.159191106187636, "learning_rate": 6.47663031183869e-06, "loss": 17.4801, "step": 23111 }, { "epoch": 0.4224687882720676, "grad_norm": 6.3639036507049935, "learning_rate": 6.476347500856844e-06, "loss": 17.7147, "step": 23112 }, { "epoch": 0.4224870674685141, "grad_norm": 5.495717279870066, "learning_rate": 6.476064684700529e-06, "loss": 17.1045, "step": 23113 }, { "epoch": 0.4225053466649606, "grad_norm": 5.334120959304089, "learning_rate": 6.475781863370738e-06, "loss": 17.3345, "step": 23114 }, { "epoch": 0.4225236258614071, "grad_norm": 7.442688966357905, "learning_rate": 6.4754990368684565e-06, "loss": 17.7983, "step": 23115 }, { "epoch": 0.42254190505785366, "grad_norm": 7.859035112315611, "learning_rate": 6.475216205194681e-06, "loss": 17.8951, "step": 23116 }, { "epoch": 0.4225601842543002, "grad_norm": 6.195672053441522, "learning_rate": 6.474933368350398e-06, "loss": 17.2802, "step": 23117 }, { "epoch": 0.4225784634507467, "grad_norm": 6.771734234262354, "learning_rate": 6.4746505263366045e-06, "loss": 17.7595, "step": 23118 }, { "epoch": 0.4225967426471932, "grad_norm": 5.870869540255059, "learning_rate": 6.4743676791542874e-06, "loss": 17.2879, "step": 23119 }, { "epoch": 0.42261502184363975, "grad_norm": 8.189513975913615, "learning_rate": 6.474084826804438e-06, "loss": 17.9759, "step": 23120 }, { "epoch": 0.4226333010400863, "grad_norm": 6.303489517903496, "learning_rate": 6.473801969288052e-06, "loss": 17.3265, "step": 23121 }, { "epoch": 0.4226515802365328, "grad_norm": 7.0375939108654695, "learning_rate": 6.473519106606117e-06, "loss": 17.8507, "step": 23122 }, { "epoch": 0.4226698594329793, "grad_norm": 6.59593245315403, "learning_rate": 6.473236238759625e-06, "loss": 17.6573, "step": 23123 }, { "epoch": 0.42268813862942584, "grad_norm": 6.561789534776478, "learning_rate": 6.472953365749569e-06, "loss": 17.5769, "step": 23124 }, { "epoch": 0.42270641782587237, "grad_norm": 6.425788824945835, "learning_rate": 6.472670487576937e-06, "loss": 17.762, "step": 23125 }, { "epoch": 0.4227246970223189, "grad_norm": 6.233744604495483, "learning_rate": 6.4723876042427245e-06, "loss": 17.5841, "step": 23126 }, { "epoch": 0.42274297621876544, "grad_norm": 6.3898368059332755, "learning_rate": 6.47210471574792e-06, "loss": 17.5965, "step": 23127 }, { "epoch": 0.4227612554152119, "grad_norm": 5.853990276212833, "learning_rate": 6.471821822093518e-06, "loss": 17.4561, "step": 23128 }, { "epoch": 0.42277953461165846, "grad_norm": 6.057939282826052, "learning_rate": 6.471538923280507e-06, "loss": 17.346, "step": 23129 }, { "epoch": 0.422797813808105, "grad_norm": 6.8457531698245715, "learning_rate": 6.471256019309879e-06, "loss": 17.6576, "step": 23130 }, { "epoch": 0.42281609300455153, "grad_norm": 6.489701519716401, "learning_rate": 6.470973110182629e-06, "loss": 17.5682, "step": 23131 }, { "epoch": 0.42283437220099807, "grad_norm": 6.988440599621172, "learning_rate": 6.470690195899744e-06, "loss": 17.9877, "step": 23132 }, { "epoch": 0.42285265139744455, "grad_norm": 6.231174844169285, "learning_rate": 6.470407276462217e-06, "loss": 17.6179, "step": 23133 }, { "epoch": 0.4228709305938911, "grad_norm": 8.562516756371206, "learning_rate": 6.470124351871041e-06, "loss": 17.6676, "step": 23134 }, { "epoch": 0.4228892097903376, "grad_norm": 6.440482517126097, "learning_rate": 6.4698414221272066e-06, "loss": 17.6784, "step": 23135 }, { "epoch": 0.42290748898678415, "grad_norm": 6.348356776678403, "learning_rate": 6.469558487231706e-06, "loss": 17.545, "step": 23136 }, { "epoch": 0.4229257681832307, "grad_norm": 12.258602007158364, "learning_rate": 6.469275547185529e-06, "loss": 18.8457, "step": 23137 }, { "epoch": 0.42294404737967717, "grad_norm": 7.0072444440524455, "learning_rate": 6.468992601989671e-06, "loss": 17.976, "step": 23138 }, { "epoch": 0.4229623265761237, "grad_norm": 6.128780314200569, "learning_rate": 6.468709651645119e-06, "loss": 17.4697, "step": 23139 }, { "epoch": 0.42298060577257024, "grad_norm": 6.223880414362575, "learning_rate": 6.4684266961528675e-06, "loss": 17.4181, "step": 23140 }, { "epoch": 0.4229988849690168, "grad_norm": 5.158827222271919, "learning_rate": 6.468143735513908e-06, "loss": 16.8935, "step": 23141 }, { "epoch": 0.4230171641654633, "grad_norm": 6.220023788164625, "learning_rate": 6.467860769729234e-06, "loss": 17.5723, "step": 23142 }, { "epoch": 0.4230354433619098, "grad_norm": 6.199615221312094, "learning_rate": 6.467577798799834e-06, "loss": 17.3186, "step": 23143 }, { "epoch": 0.4230537225583563, "grad_norm": 5.38133794882142, "learning_rate": 6.4672948227267e-06, "loss": 17.0014, "step": 23144 }, { "epoch": 0.42307200175480286, "grad_norm": 6.8549333842776585, "learning_rate": 6.467011841510827e-06, "loss": 18.0086, "step": 23145 }, { "epoch": 0.4230902809512494, "grad_norm": 5.703526497242912, "learning_rate": 6.466728855153203e-06, "loss": 17.295, "step": 23146 }, { "epoch": 0.42310856014769593, "grad_norm": 7.304360598495538, "learning_rate": 6.466445863654823e-06, "loss": 17.7328, "step": 23147 }, { "epoch": 0.4231268393441424, "grad_norm": 7.816409379870154, "learning_rate": 6.466162867016677e-06, "loss": 17.6701, "step": 23148 }, { "epoch": 0.42314511854058895, "grad_norm": 6.000202311919672, "learning_rate": 6.465879865239757e-06, "loss": 17.4872, "step": 23149 }, { "epoch": 0.4231633977370355, "grad_norm": 7.076186929357036, "learning_rate": 6.465596858325056e-06, "loss": 17.9496, "step": 23150 }, { "epoch": 0.423181676933482, "grad_norm": 6.291260848980936, "learning_rate": 6.465313846273566e-06, "loss": 17.1886, "step": 23151 }, { "epoch": 0.4231999561299285, "grad_norm": 4.861121498229915, "learning_rate": 6.465030829086276e-06, "loss": 16.8271, "step": 23152 }, { "epoch": 0.42321823532637504, "grad_norm": 7.000038974411, "learning_rate": 6.464747806764181e-06, "loss": 17.7604, "step": 23153 }, { "epoch": 0.42323651452282157, "grad_norm": 6.553291930245339, "learning_rate": 6.4644647793082725e-06, "loss": 17.5454, "step": 23154 }, { "epoch": 0.4232547937192681, "grad_norm": 6.337761010366317, "learning_rate": 6.464181746719541e-06, "loss": 17.578, "step": 23155 }, { "epoch": 0.42327307291571464, "grad_norm": 6.479008818263093, "learning_rate": 6.463898708998981e-06, "loss": 17.6494, "step": 23156 }, { "epoch": 0.4232913521121611, "grad_norm": 6.435850887126168, "learning_rate": 6.463615666147581e-06, "loss": 17.4268, "step": 23157 }, { "epoch": 0.42330963130860766, "grad_norm": 7.474651685203027, "learning_rate": 6.463332618166337e-06, "loss": 17.697, "step": 23158 }, { "epoch": 0.4233279105050542, "grad_norm": 7.888308945496125, "learning_rate": 6.463049565056239e-06, "loss": 18.1867, "step": 23159 }, { "epoch": 0.42334618970150073, "grad_norm": 5.471285991459773, "learning_rate": 6.462766506818279e-06, "loss": 17.1011, "step": 23160 }, { "epoch": 0.42336446889794727, "grad_norm": 6.5632578482274875, "learning_rate": 6.462483443453449e-06, "loss": 17.5202, "step": 23161 }, { "epoch": 0.42338274809439375, "grad_norm": 6.3339588163787255, "learning_rate": 6.4622003749627415e-06, "loss": 17.4519, "step": 23162 }, { "epoch": 0.4234010272908403, "grad_norm": 7.567082923147876, "learning_rate": 6.461917301347148e-06, "loss": 17.8977, "step": 23163 }, { "epoch": 0.4234193064872868, "grad_norm": 7.367710063713969, "learning_rate": 6.461634222607662e-06, "loss": 18.1403, "step": 23164 }, { "epoch": 0.42343758568373335, "grad_norm": 8.318156710560396, "learning_rate": 6.461351138745275e-06, "loss": 18.1321, "step": 23165 }, { "epoch": 0.4234558648801799, "grad_norm": 7.474583299570602, "learning_rate": 6.461068049760978e-06, "loss": 18.057, "step": 23166 }, { "epoch": 0.42347414407662637, "grad_norm": 6.9254902409955275, "learning_rate": 6.460784955655766e-06, "loss": 17.8518, "step": 23167 }, { "epoch": 0.4234924232730729, "grad_norm": 7.831107012584451, "learning_rate": 6.4605018564306275e-06, "loss": 18.1655, "step": 23168 }, { "epoch": 0.42351070246951944, "grad_norm": 5.2637812020058234, "learning_rate": 6.460218752086559e-06, "loss": 17.1073, "step": 23169 }, { "epoch": 0.423528981665966, "grad_norm": 6.165313486475107, "learning_rate": 6.459935642624549e-06, "loss": 17.4921, "step": 23170 }, { "epoch": 0.4235472608624125, "grad_norm": 5.610198829730439, "learning_rate": 6.45965252804559e-06, "loss": 17.2784, "step": 23171 }, { "epoch": 0.423565540058859, "grad_norm": 6.1106798370772415, "learning_rate": 6.459369408350677e-06, "loss": 17.2397, "step": 23172 }, { "epoch": 0.4235838192553055, "grad_norm": 7.268801561247627, "learning_rate": 6.459086283540802e-06, "loss": 18.0769, "step": 23173 }, { "epoch": 0.42360209845175206, "grad_norm": 5.950001126618713, "learning_rate": 6.458803153616955e-06, "loss": 17.5477, "step": 23174 }, { "epoch": 0.4236203776481986, "grad_norm": 5.950580894332745, "learning_rate": 6.45852001858013e-06, "loss": 17.0636, "step": 23175 }, { "epoch": 0.42363865684464513, "grad_norm": 6.151153173015378, "learning_rate": 6.458236878431317e-06, "loss": 17.3583, "step": 23176 }, { "epoch": 0.4236569360410916, "grad_norm": 6.353086558370284, "learning_rate": 6.457953733171513e-06, "loss": 17.2193, "step": 23177 }, { "epoch": 0.42367521523753815, "grad_norm": 6.90804130787632, "learning_rate": 6.457670582801706e-06, "loss": 17.4209, "step": 23178 }, { "epoch": 0.4236934944339847, "grad_norm": 6.916016700328348, "learning_rate": 6.457387427322889e-06, "loss": 17.4882, "step": 23179 }, { "epoch": 0.4237117736304312, "grad_norm": 6.390139397516349, "learning_rate": 6.4571042667360585e-06, "loss": 17.3082, "step": 23180 }, { "epoch": 0.42373005282687776, "grad_norm": 7.211340731285922, "learning_rate": 6.4568211010422025e-06, "loss": 17.9054, "step": 23181 }, { "epoch": 0.42374833202332424, "grad_norm": 7.222845905313327, "learning_rate": 6.456537930242315e-06, "loss": 17.7328, "step": 23182 }, { "epoch": 0.4237666112197708, "grad_norm": 6.221297555481, "learning_rate": 6.45625475433739e-06, "loss": 17.6305, "step": 23183 }, { "epoch": 0.4237848904162173, "grad_norm": 6.447643288950181, "learning_rate": 6.455971573328415e-06, "loss": 17.6717, "step": 23184 }, { "epoch": 0.42380316961266384, "grad_norm": 8.04998433861434, "learning_rate": 6.4556883872163875e-06, "loss": 18.1202, "step": 23185 }, { "epoch": 0.4238214488091103, "grad_norm": 7.751375134420809, "learning_rate": 6.4554051960023e-06, "loss": 18.5387, "step": 23186 }, { "epoch": 0.42383972800555686, "grad_norm": 7.467149403090195, "learning_rate": 6.455121999687143e-06, "loss": 17.7701, "step": 23187 }, { "epoch": 0.4238580072020034, "grad_norm": 7.1647614363195515, "learning_rate": 6.454838798271909e-06, "loss": 17.3247, "step": 23188 }, { "epoch": 0.42387628639844993, "grad_norm": 8.03890099407871, "learning_rate": 6.45455559175759e-06, "loss": 18.1342, "step": 23189 }, { "epoch": 0.42389456559489647, "grad_norm": 7.077863520526551, "learning_rate": 6.454272380145183e-06, "loss": 17.4789, "step": 23190 }, { "epoch": 0.42391284479134295, "grad_norm": 7.8605563705078465, "learning_rate": 6.453989163435676e-06, "loss": 18.283, "step": 23191 }, { "epoch": 0.4239311239877895, "grad_norm": 7.136978425686276, "learning_rate": 6.453705941630062e-06, "loss": 17.8003, "step": 23192 }, { "epoch": 0.423949403184236, "grad_norm": 5.420422828767463, "learning_rate": 6.453422714729336e-06, "loss": 17.055, "step": 23193 }, { "epoch": 0.42396768238068255, "grad_norm": 8.036881457389141, "learning_rate": 6.453139482734489e-06, "loss": 17.8265, "step": 23194 }, { "epoch": 0.4239859615771291, "grad_norm": 6.376078633630916, "learning_rate": 6.452856245646515e-06, "loss": 17.5076, "step": 23195 }, { "epoch": 0.42400424077357557, "grad_norm": 7.055613800982587, "learning_rate": 6.4525730034664046e-06, "loss": 17.6407, "step": 23196 }, { "epoch": 0.4240225199700221, "grad_norm": 5.7198785669912136, "learning_rate": 6.4522897561951536e-06, "loss": 17.2559, "step": 23197 }, { "epoch": 0.42404079916646864, "grad_norm": 7.325935359629535, "learning_rate": 6.452006503833752e-06, "loss": 17.8513, "step": 23198 }, { "epoch": 0.4240590783629152, "grad_norm": 6.532728512763269, "learning_rate": 6.451723246383194e-06, "loss": 17.2482, "step": 23199 }, { "epoch": 0.4240773575593617, "grad_norm": 6.1520616411847495, "learning_rate": 6.451439983844472e-06, "loss": 17.5248, "step": 23200 }, { "epoch": 0.4240956367558082, "grad_norm": 5.030040310422997, "learning_rate": 6.451156716218579e-06, "loss": 16.9401, "step": 23201 }, { "epoch": 0.4241139159522547, "grad_norm": 6.108694950567766, "learning_rate": 6.450873443506507e-06, "loss": 17.6505, "step": 23202 }, { "epoch": 0.42413219514870126, "grad_norm": 6.204057434994778, "learning_rate": 6.450590165709248e-06, "loss": 17.585, "step": 23203 }, { "epoch": 0.4241504743451478, "grad_norm": 9.611802540482145, "learning_rate": 6.4503068828277994e-06, "loss": 17.6319, "step": 23204 }, { "epoch": 0.42416875354159433, "grad_norm": 5.975477673986726, "learning_rate": 6.450023594863149e-06, "loss": 17.2863, "step": 23205 }, { "epoch": 0.4241870327380408, "grad_norm": 6.75798405226483, "learning_rate": 6.449740301816292e-06, "loss": 17.4903, "step": 23206 }, { "epoch": 0.42420531193448735, "grad_norm": 6.041848193441785, "learning_rate": 6.449457003688222e-06, "loss": 17.4501, "step": 23207 }, { "epoch": 0.4242235911309339, "grad_norm": 5.974743847344579, "learning_rate": 6.4491737004799305e-06, "loss": 17.2646, "step": 23208 }, { "epoch": 0.4242418703273804, "grad_norm": 6.457713787705749, "learning_rate": 6.44889039219241e-06, "loss": 17.4153, "step": 23209 }, { "epoch": 0.42426014952382696, "grad_norm": 5.316422577175712, "learning_rate": 6.448607078826655e-06, "loss": 17.2499, "step": 23210 }, { "epoch": 0.42427842872027344, "grad_norm": 6.224997073142436, "learning_rate": 6.448323760383659e-06, "loss": 17.7254, "step": 23211 }, { "epoch": 0.42429670791672, "grad_norm": 6.81697345358101, "learning_rate": 6.448040436864412e-06, "loss": 17.4188, "step": 23212 }, { "epoch": 0.4243149871131665, "grad_norm": 7.262940284252221, "learning_rate": 6.447757108269911e-06, "loss": 17.9658, "step": 23213 }, { "epoch": 0.42433326630961304, "grad_norm": 6.028857721532317, "learning_rate": 6.4474737746011465e-06, "loss": 17.2204, "step": 23214 }, { "epoch": 0.4243515455060596, "grad_norm": 5.812748220060766, "learning_rate": 6.447190435859111e-06, "loss": 17.2248, "step": 23215 }, { "epoch": 0.42436982470250606, "grad_norm": 7.9293828470095615, "learning_rate": 6.446907092044799e-06, "loss": 17.979, "step": 23216 }, { "epoch": 0.4243881038989526, "grad_norm": 6.89259415779758, "learning_rate": 6.446623743159203e-06, "loss": 17.6296, "step": 23217 }, { "epoch": 0.42440638309539913, "grad_norm": 7.8202367995967, "learning_rate": 6.446340389203317e-06, "loss": 18.0758, "step": 23218 }, { "epoch": 0.42442466229184567, "grad_norm": 5.132927930426186, "learning_rate": 6.446057030178132e-06, "loss": 17.0155, "step": 23219 }, { "epoch": 0.42444294148829215, "grad_norm": 4.992649609231906, "learning_rate": 6.445773666084645e-06, "loss": 16.9942, "step": 23220 }, { "epoch": 0.4244612206847387, "grad_norm": 7.880871310450647, "learning_rate": 6.445490296923844e-06, "loss": 17.6014, "step": 23221 }, { "epoch": 0.4244794998811852, "grad_norm": 6.238049813757726, "learning_rate": 6.445206922696727e-06, "loss": 17.4601, "step": 23222 }, { "epoch": 0.42449777907763175, "grad_norm": 6.126625090422114, "learning_rate": 6.444923543404285e-06, "loss": 17.3324, "step": 23223 }, { "epoch": 0.4245160582740783, "grad_norm": 6.958085470379649, "learning_rate": 6.444640159047511e-06, "loss": 17.7785, "step": 23224 }, { "epoch": 0.42453433747052477, "grad_norm": 6.814057729507913, "learning_rate": 6.444356769627398e-06, "loss": 17.8646, "step": 23225 }, { "epoch": 0.4245526166669713, "grad_norm": 5.994456734825881, "learning_rate": 6.4440733751449396e-06, "loss": 17.3985, "step": 23226 }, { "epoch": 0.42457089586341784, "grad_norm": 8.369769332719311, "learning_rate": 6.443789975601129e-06, "loss": 17.7328, "step": 23227 }, { "epoch": 0.4245891750598644, "grad_norm": 8.509696638553095, "learning_rate": 6.443506570996962e-06, "loss": 18.0756, "step": 23228 }, { "epoch": 0.4246074542563109, "grad_norm": 5.68080480769699, "learning_rate": 6.4432231613334295e-06, "loss": 17.3222, "step": 23229 }, { "epoch": 0.4246257334527574, "grad_norm": 5.864578603069478, "learning_rate": 6.442939746611523e-06, "loss": 17.4366, "step": 23230 }, { "epoch": 0.42464401264920393, "grad_norm": 6.695005158913941, "learning_rate": 6.44265632683224e-06, "loss": 17.4122, "step": 23231 }, { "epoch": 0.42466229184565046, "grad_norm": 7.886751378773805, "learning_rate": 6.44237290199657e-06, "loss": 18.1189, "step": 23232 }, { "epoch": 0.424680571042097, "grad_norm": 5.667682538075096, "learning_rate": 6.4420894721055094e-06, "loss": 17.2764, "step": 23233 }, { "epoch": 0.42469885023854353, "grad_norm": 6.566721020308051, "learning_rate": 6.44180603716005e-06, "loss": 17.4361, "step": 23234 }, { "epoch": 0.42471712943499, "grad_norm": 6.160303505709899, "learning_rate": 6.441522597161185e-06, "loss": 17.2469, "step": 23235 }, { "epoch": 0.42473540863143655, "grad_norm": 6.889576470886445, "learning_rate": 6.4412391521099084e-06, "loss": 17.6904, "step": 23236 }, { "epoch": 0.4247536878278831, "grad_norm": 5.488787147695676, "learning_rate": 6.4409557020072145e-06, "loss": 16.8651, "step": 23237 }, { "epoch": 0.4247719670243296, "grad_norm": 8.267265947832813, "learning_rate": 6.440672246854096e-06, "loss": 18.3391, "step": 23238 }, { "epoch": 0.42479024622077616, "grad_norm": 6.037747717693437, "learning_rate": 6.4403887866515445e-06, "loss": 17.3174, "step": 23239 }, { "epoch": 0.42480852541722264, "grad_norm": 6.060987718727519, "learning_rate": 6.440105321400556e-06, "loss": 17.6085, "step": 23240 }, { "epoch": 0.4248268046136692, "grad_norm": 5.91925489394884, "learning_rate": 6.439821851102124e-06, "loss": 17.2479, "step": 23241 }, { "epoch": 0.4248450838101157, "grad_norm": 5.738825810714, "learning_rate": 6.439538375757243e-06, "loss": 17.2108, "step": 23242 }, { "epoch": 0.42486336300656224, "grad_norm": 7.070132764479361, "learning_rate": 6.439254895366902e-06, "loss": 17.7795, "step": 23243 }, { "epoch": 0.4248816422030088, "grad_norm": 5.287897605428229, "learning_rate": 6.4389714099320975e-06, "loss": 17.1248, "step": 23244 }, { "epoch": 0.42489992139945526, "grad_norm": 7.782209619870981, "learning_rate": 6.438687919453826e-06, "loss": 18.2702, "step": 23245 }, { "epoch": 0.4249182005959018, "grad_norm": 6.820480185191707, "learning_rate": 6.438404423933076e-06, "loss": 17.3446, "step": 23246 }, { "epoch": 0.42493647979234833, "grad_norm": 5.73377650808498, "learning_rate": 6.438120923370843e-06, "loss": 17.2321, "step": 23247 }, { "epoch": 0.42495475898879487, "grad_norm": 7.453244852160705, "learning_rate": 6.437837417768123e-06, "loss": 18.1756, "step": 23248 }, { "epoch": 0.4249730381852414, "grad_norm": 5.9834094339976, "learning_rate": 6.437553907125905e-06, "loss": 17.3006, "step": 23249 }, { "epoch": 0.4249913173816879, "grad_norm": 5.715656527767392, "learning_rate": 6.437270391445186e-06, "loss": 17.3641, "step": 23250 }, { "epoch": 0.4250095965781344, "grad_norm": 5.991435802699906, "learning_rate": 6.43698687072696e-06, "loss": 17.5296, "step": 23251 }, { "epoch": 0.42502787577458095, "grad_norm": 5.914436881039171, "learning_rate": 6.436703344972219e-06, "loss": 17.3627, "step": 23252 }, { "epoch": 0.4250461549710275, "grad_norm": 7.27600022634788, "learning_rate": 6.436419814181958e-06, "loss": 17.9451, "step": 23253 }, { "epoch": 0.42506443416747397, "grad_norm": 6.39585124336612, "learning_rate": 6.43613627835717e-06, "loss": 17.7013, "step": 23254 }, { "epoch": 0.4250827133639205, "grad_norm": 7.95961009723974, "learning_rate": 6.435852737498849e-06, "loss": 17.8516, "step": 23255 }, { "epoch": 0.42510099256036704, "grad_norm": 7.026971734875077, "learning_rate": 6.435569191607989e-06, "loss": 17.6718, "step": 23256 }, { "epoch": 0.4251192717568136, "grad_norm": 6.698563438269797, "learning_rate": 6.435285640685582e-06, "loss": 17.7578, "step": 23257 }, { "epoch": 0.4251375509532601, "grad_norm": 6.703836671505481, "learning_rate": 6.435002084732625e-06, "loss": 17.5404, "step": 23258 }, { "epoch": 0.4251558301497066, "grad_norm": 5.346873724366964, "learning_rate": 6.4347185237501095e-06, "loss": 17.004, "step": 23259 }, { "epoch": 0.42517410934615313, "grad_norm": 5.8769726319864874, "learning_rate": 6.4344349577390306e-06, "loss": 16.9058, "step": 23260 }, { "epoch": 0.42519238854259966, "grad_norm": 4.379997502804807, "learning_rate": 6.434151386700382e-06, "loss": 16.8215, "step": 23261 }, { "epoch": 0.4252106677390462, "grad_norm": 6.778903453990103, "learning_rate": 6.433867810635156e-06, "loss": 17.7149, "step": 23262 }, { "epoch": 0.42522894693549274, "grad_norm": 8.234971800394172, "learning_rate": 6.433584229544348e-06, "loss": 17.7708, "step": 23263 }, { "epoch": 0.4252472261319392, "grad_norm": 7.117066939009308, "learning_rate": 6.4333006434289525e-06, "loss": 17.7574, "step": 23264 }, { "epoch": 0.42526550532838575, "grad_norm": 7.891690304306368, "learning_rate": 6.433017052289963e-06, "loss": 18.013, "step": 23265 }, { "epoch": 0.4252837845248323, "grad_norm": 6.682338327057211, "learning_rate": 6.432733456128371e-06, "loss": 17.53, "step": 23266 }, { "epoch": 0.4253020637212788, "grad_norm": 7.742684286834764, "learning_rate": 6.432449854945174e-06, "loss": 18.009, "step": 23267 }, { "epoch": 0.42532034291772536, "grad_norm": 7.87572476066597, "learning_rate": 6.4321662487413634e-06, "loss": 18.0306, "step": 23268 }, { "epoch": 0.42533862211417184, "grad_norm": 12.092941117262523, "learning_rate": 6.431882637517937e-06, "loss": 18.5198, "step": 23269 }, { "epoch": 0.4253569013106184, "grad_norm": 6.577591419740454, "learning_rate": 6.431599021275885e-06, "loss": 17.8837, "step": 23270 }, { "epoch": 0.4253751805070649, "grad_norm": 5.7233591927392276, "learning_rate": 6.4313154000162e-06, "loss": 17.2162, "step": 23271 }, { "epoch": 0.42539345970351145, "grad_norm": 5.9142626601897765, "learning_rate": 6.431031773739882e-06, "loss": 17.2328, "step": 23272 }, { "epoch": 0.425411738899958, "grad_norm": 6.981238667079544, "learning_rate": 6.430748142447921e-06, "loss": 17.9035, "step": 23273 }, { "epoch": 0.42543001809640446, "grad_norm": 7.6050498953989765, "learning_rate": 6.430464506141312e-06, "loss": 18.0571, "step": 23274 }, { "epoch": 0.425448297292851, "grad_norm": 6.8033061858957655, "learning_rate": 6.430180864821048e-06, "loss": 17.6216, "step": 23275 }, { "epoch": 0.42546657648929753, "grad_norm": 8.662456425833138, "learning_rate": 6.4298972184881255e-06, "loss": 18.2785, "step": 23276 }, { "epoch": 0.42548485568574407, "grad_norm": 5.844497092877724, "learning_rate": 6.4296135671435365e-06, "loss": 17.3414, "step": 23277 }, { "epoch": 0.4255031348821906, "grad_norm": 7.497452862917624, "learning_rate": 6.429329910788276e-06, "loss": 17.8492, "step": 23278 }, { "epoch": 0.4255214140786371, "grad_norm": 7.102280082484249, "learning_rate": 6.429046249423339e-06, "loss": 17.7186, "step": 23279 }, { "epoch": 0.4255396932750836, "grad_norm": 7.334603403675517, "learning_rate": 6.428762583049718e-06, "loss": 17.8792, "step": 23280 }, { "epoch": 0.42555797247153015, "grad_norm": 5.9367013808119005, "learning_rate": 6.428478911668408e-06, "loss": 17.5629, "step": 23281 }, { "epoch": 0.4255762516679767, "grad_norm": 6.119055864556667, "learning_rate": 6.428195235280403e-06, "loss": 17.2329, "step": 23282 }, { "epoch": 0.4255945308644232, "grad_norm": 5.894820412562195, "learning_rate": 6.4279115538867e-06, "loss": 17.3982, "step": 23283 }, { "epoch": 0.4256128100608697, "grad_norm": 6.780257747702837, "learning_rate": 6.427627867488289e-06, "loss": 18.1553, "step": 23284 }, { "epoch": 0.42563108925731624, "grad_norm": 6.77157361870574, "learning_rate": 6.427344176086166e-06, "loss": 17.9649, "step": 23285 }, { "epoch": 0.4256493684537628, "grad_norm": 7.303543310573126, "learning_rate": 6.427060479681326e-06, "loss": 17.8533, "step": 23286 }, { "epoch": 0.4256676476502093, "grad_norm": 6.705371479411704, "learning_rate": 6.426776778274763e-06, "loss": 17.9653, "step": 23287 }, { "epoch": 0.4256859268466558, "grad_norm": 5.539826475065183, "learning_rate": 6.426493071867472e-06, "loss": 17.0326, "step": 23288 }, { "epoch": 0.42570420604310233, "grad_norm": 6.732033702819237, "learning_rate": 6.426209360460445e-06, "loss": 17.5677, "step": 23289 }, { "epoch": 0.42572248523954886, "grad_norm": 5.35049837540552, "learning_rate": 6.425925644054679e-06, "loss": 16.9009, "step": 23290 }, { "epoch": 0.4257407644359954, "grad_norm": 6.842504751471586, "learning_rate": 6.425641922651167e-06, "loss": 17.8086, "step": 23291 }, { "epoch": 0.42575904363244194, "grad_norm": 6.861016117580617, "learning_rate": 6.425358196250904e-06, "loss": 17.7915, "step": 23292 }, { "epoch": 0.4257773228288884, "grad_norm": 5.9472844534362315, "learning_rate": 6.4250744648548835e-06, "loss": 17.2729, "step": 23293 }, { "epoch": 0.42579560202533495, "grad_norm": 5.451052307127618, "learning_rate": 6.4247907284641005e-06, "loss": 17.1485, "step": 23294 }, { "epoch": 0.4258138812217815, "grad_norm": 4.984651170296348, "learning_rate": 6.424506987079551e-06, "loss": 16.9197, "step": 23295 }, { "epoch": 0.425832160418228, "grad_norm": 7.692813012204413, "learning_rate": 6.4242232407022274e-06, "loss": 18.1744, "step": 23296 }, { "epoch": 0.42585043961467456, "grad_norm": 6.741708956330376, "learning_rate": 6.423939489333126e-06, "loss": 17.6719, "step": 23297 }, { "epoch": 0.42586871881112104, "grad_norm": 6.629877763655567, "learning_rate": 6.423655732973237e-06, "loss": 17.5006, "step": 23298 }, { "epoch": 0.4258869980075676, "grad_norm": 5.856492412922344, "learning_rate": 6.423371971623562e-06, "loss": 17.4823, "step": 23299 }, { "epoch": 0.4259052772040141, "grad_norm": 5.609827729623355, "learning_rate": 6.423088205285091e-06, "loss": 16.9767, "step": 23300 }, { "epoch": 0.42592355640046065, "grad_norm": 6.290690897885583, "learning_rate": 6.422804433958818e-06, "loss": 17.642, "step": 23301 }, { "epoch": 0.4259418355969072, "grad_norm": 5.601023951618471, "learning_rate": 6.422520657645739e-06, "loss": 17.2755, "step": 23302 }, { "epoch": 0.42596011479335366, "grad_norm": 6.052452128632869, "learning_rate": 6.422236876346848e-06, "loss": 17.4081, "step": 23303 }, { "epoch": 0.4259783939898002, "grad_norm": 6.388773808185296, "learning_rate": 6.4219530900631425e-06, "loss": 17.6369, "step": 23304 }, { "epoch": 0.42599667318624673, "grad_norm": 5.870620735000374, "learning_rate": 6.421669298795613e-06, "loss": 17.2354, "step": 23305 }, { "epoch": 0.42601495238269327, "grad_norm": 5.390375085752392, "learning_rate": 6.4213855025452565e-06, "loss": 17.1514, "step": 23306 }, { "epoch": 0.4260332315791398, "grad_norm": 5.589035895055516, "learning_rate": 6.421101701313067e-06, "loss": 17.3054, "step": 23307 }, { "epoch": 0.4260515107755863, "grad_norm": 6.425361073746728, "learning_rate": 6.420817895100039e-06, "loss": 17.4956, "step": 23308 }, { "epoch": 0.4260697899720328, "grad_norm": 6.037017043839415, "learning_rate": 6.420534083907169e-06, "loss": 17.4951, "step": 23309 }, { "epoch": 0.42608806916847936, "grad_norm": 6.237122949554329, "learning_rate": 6.4202502677354485e-06, "loss": 17.4349, "step": 23310 }, { "epoch": 0.4261063483649259, "grad_norm": 6.083998987840913, "learning_rate": 6.419966446585875e-06, "loss": 17.135, "step": 23311 }, { "epoch": 0.4261246275613724, "grad_norm": 6.549137200299183, "learning_rate": 6.419682620459442e-06, "loss": 17.6685, "step": 23312 }, { "epoch": 0.4261429067578189, "grad_norm": 7.026041518216922, "learning_rate": 6.419398789357144e-06, "loss": 17.681, "step": 23313 }, { "epoch": 0.42616118595426544, "grad_norm": 8.113965958966368, "learning_rate": 6.419114953279979e-06, "loss": 18.2747, "step": 23314 }, { "epoch": 0.426179465150712, "grad_norm": 5.402074650973205, "learning_rate": 6.418831112228937e-06, "loss": 16.9344, "step": 23315 }, { "epoch": 0.4261977443471585, "grad_norm": 6.291374197533516, "learning_rate": 6.418547266205014e-06, "loss": 17.5045, "step": 23316 }, { "epoch": 0.42621602354360505, "grad_norm": 6.783662187149835, "learning_rate": 6.418263415209207e-06, "loss": 17.9824, "step": 23317 }, { "epoch": 0.42623430274005153, "grad_norm": 6.520332513714922, "learning_rate": 6.417979559242512e-06, "loss": 17.5565, "step": 23318 }, { "epoch": 0.42625258193649807, "grad_norm": 6.2588971858846625, "learning_rate": 6.417695698305919e-06, "loss": 17.3797, "step": 23319 }, { "epoch": 0.4262708611329446, "grad_norm": 6.113436550965818, "learning_rate": 6.417411832400427e-06, "loss": 17.4842, "step": 23320 }, { "epoch": 0.42628914032939114, "grad_norm": 4.839899465251502, "learning_rate": 6.417127961527029e-06, "loss": 16.9643, "step": 23321 }, { "epoch": 0.4263074195258376, "grad_norm": 6.913841491735516, "learning_rate": 6.41684408568672e-06, "loss": 17.7049, "step": 23322 }, { "epoch": 0.42632569872228415, "grad_norm": 6.253215869966576, "learning_rate": 6.4165602048804964e-06, "loss": 17.3477, "step": 23323 }, { "epoch": 0.4263439779187307, "grad_norm": 6.160457300128599, "learning_rate": 6.416276319109351e-06, "loss": 17.4272, "step": 23324 }, { "epoch": 0.4263622571151772, "grad_norm": 6.605787098097573, "learning_rate": 6.415992428374281e-06, "loss": 17.5389, "step": 23325 }, { "epoch": 0.42638053631162376, "grad_norm": 5.904636517213099, "learning_rate": 6.41570853267628e-06, "loss": 17.3321, "step": 23326 }, { "epoch": 0.42639881550807024, "grad_norm": 5.849282282109855, "learning_rate": 6.4154246320163435e-06, "loss": 17.2366, "step": 23327 }, { "epoch": 0.4264170947045168, "grad_norm": 5.530342027558702, "learning_rate": 6.415140726395468e-06, "loss": 17.1125, "step": 23328 }, { "epoch": 0.4264353739009633, "grad_norm": 6.067278157116494, "learning_rate": 6.414856815814645e-06, "loss": 17.5748, "step": 23329 }, { "epoch": 0.42645365309740985, "grad_norm": 8.310868332090848, "learning_rate": 6.414572900274871e-06, "loss": 18.2586, "step": 23330 }, { "epoch": 0.4264719322938564, "grad_norm": 5.3566791051364175, "learning_rate": 6.414288979777145e-06, "loss": 17.2356, "step": 23331 }, { "epoch": 0.42649021149030286, "grad_norm": 5.551640164710231, "learning_rate": 6.414005054322456e-06, "loss": 17.1504, "step": 23332 }, { "epoch": 0.4265084906867494, "grad_norm": 8.486077590578063, "learning_rate": 6.413721123911803e-06, "loss": 17.9259, "step": 23333 }, { "epoch": 0.42652676988319593, "grad_norm": 7.864505064176411, "learning_rate": 6.41343718854618e-06, "loss": 17.8304, "step": 23334 }, { "epoch": 0.42654504907964247, "grad_norm": 6.703655426624963, "learning_rate": 6.413153248226583e-06, "loss": 17.6709, "step": 23335 }, { "epoch": 0.426563328276089, "grad_norm": 6.9711473328119515, "learning_rate": 6.412869302954005e-06, "loss": 16.8692, "step": 23336 }, { "epoch": 0.4265816074725355, "grad_norm": 6.087758990966172, "learning_rate": 6.412585352729443e-06, "loss": 17.3385, "step": 23337 }, { "epoch": 0.426599886668982, "grad_norm": 6.883960524960858, "learning_rate": 6.412301397553893e-06, "loss": 17.7104, "step": 23338 }, { "epoch": 0.42661816586542856, "grad_norm": 5.557755489306266, "learning_rate": 6.412017437428348e-06, "loss": 17.3387, "step": 23339 }, { "epoch": 0.4266364450618751, "grad_norm": 5.728770330626398, "learning_rate": 6.411733472353805e-06, "loss": 17.3812, "step": 23340 }, { "epoch": 0.4266547242583216, "grad_norm": 5.894866565703461, "learning_rate": 6.411449502331258e-06, "loss": 17.2593, "step": 23341 }, { "epoch": 0.4266730034547681, "grad_norm": 7.00927155631741, "learning_rate": 6.411165527361705e-06, "loss": 17.556, "step": 23342 }, { "epoch": 0.42669128265121464, "grad_norm": 7.27315518515396, "learning_rate": 6.410881547446137e-06, "loss": 17.6639, "step": 23343 }, { "epoch": 0.4267095618476612, "grad_norm": 7.228428613284711, "learning_rate": 6.410597562585552e-06, "loss": 18.053, "step": 23344 }, { "epoch": 0.4267278410441077, "grad_norm": 6.687136055615292, "learning_rate": 6.4103135727809465e-06, "loss": 17.5049, "step": 23345 }, { "epoch": 0.42674612024055425, "grad_norm": 6.764895121356559, "learning_rate": 6.410029578033313e-06, "loss": 17.7955, "step": 23346 }, { "epoch": 0.42676439943700073, "grad_norm": 6.520786108739674, "learning_rate": 6.4097455783436495e-06, "loss": 17.4612, "step": 23347 }, { "epoch": 0.42678267863344727, "grad_norm": 8.915873416299993, "learning_rate": 6.409461573712947e-06, "loss": 18.2511, "step": 23348 }, { "epoch": 0.4268009578298938, "grad_norm": 6.260238044671665, "learning_rate": 6.409177564142207e-06, "loss": 17.3599, "step": 23349 }, { "epoch": 0.42681923702634034, "grad_norm": 8.116313773124174, "learning_rate": 6.408893549632421e-06, "loss": 18.0807, "step": 23350 }, { "epoch": 0.4268375162227869, "grad_norm": 5.993751166289965, "learning_rate": 6.408609530184585e-06, "loss": 17.2234, "step": 23351 }, { "epoch": 0.42685579541923335, "grad_norm": 6.319804179162196, "learning_rate": 6.4083255057996954e-06, "loss": 17.6066, "step": 23352 }, { "epoch": 0.4268740746156799, "grad_norm": 6.584059094697491, "learning_rate": 6.408041476478747e-06, "loss": 17.5112, "step": 23353 }, { "epoch": 0.4268923538121264, "grad_norm": 6.5244990030763494, "learning_rate": 6.407757442222735e-06, "loss": 17.6516, "step": 23354 }, { "epoch": 0.42691063300857296, "grad_norm": 5.883039291502872, "learning_rate": 6.407473403032656e-06, "loss": 17.0701, "step": 23355 }, { "epoch": 0.42692891220501944, "grad_norm": 6.736911400804352, "learning_rate": 6.407189358909505e-06, "loss": 17.6317, "step": 23356 }, { "epoch": 0.426947191401466, "grad_norm": 7.641623068033282, "learning_rate": 6.406905309854275e-06, "loss": 17.6438, "step": 23357 }, { "epoch": 0.4269654705979125, "grad_norm": 6.224456284307733, "learning_rate": 6.406621255867966e-06, "loss": 17.3261, "step": 23358 }, { "epoch": 0.42698374979435905, "grad_norm": 5.5787801716904655, "learning_rate": 6.406337196951573e-06, "loss": 17.2173, "step": 23359 }, { "epoch": 0.4270020289908056, "grad_norm": 6.001649194149242, "learning_rate": 6.406053133106088e-06, "loss": 17.1758, "step": 23360 }, { "epoch": 0.42702030818725206, "grad_norm": 6.336602194065833, "learning_rate": 6.40576906433251e-06, "loss": 17.4226, "step": 23361 }, { "epoch": 0.4270385873836986, "grad_norm": 5.5915735460026985, "learning_rate": 6.405484990631831e-06, "loss": 17.441, "step": 23362 }, { "epoch": 0.42705686658014513, "grad_norm": 5.60167724582844, "learning_rate": 6.405200912005052e-06, "loss": 17.2534, "step": 23363 }, { "epoch": 0.42707514577659167, "grad_norm": 6.76547289832025, "learning_rate": 6.404916828453165e-06, "loss": 17.6248, "step": 23364 }, { "epoch": 0.4270934249730382, "grad_norm": 6.751197872198745, "learning_rate": 6.404632739977166e-06, "loss": 17.728, "step": 23365 }, { "epoch": 0.4271117041694847, "grad_norm": 5.4264654123146085, "learning_rate": 6.40434864657805e-06, "loss": 17.008, "step": 23366 }, { "epoch": 0.4271299833659312, "grad_norm": 5.563911891125588, "learning_rate": 6.404064548256815e-06, "loss": 17.1815, "step": 23367 }, { "epoch": 0.42714826256237776, "grad_norm": 7.417629212616718, "learning_rate": 6.403780445014456e-06, "loss": 17.7103, "step": 23368 }, { "epoch": 0.4271665417588243, "grad_norm": 6.1007423515647, "learning_rate": 6.403496336851969e-06, "loss": 17.426, "step": 23369 }, { "epoch": 0.4271848209552708, "grad_norm": 6.109197967976358, "learning_rate": 6.403212223770348e-06, "loss": 17.6285, "step": 23370 }, { "epoch": 0.4272031001517173, "grad_norm": 6.768557816439791, "learning_rate": 6.402928105770588e-06, "loss": 17.5708, "step": 23371 }, { "epoch": 0.42722137934816384, "grad_norm": 8.242561548441238, "learning_rate": 6.402643982853689e-06, "loss": 17.5663, "step": 23372 }, { "epoch": 0.4272396585446104, "grad_norm": 6.9426266384781385, "learning_rate": 6.402359855020645e-06, "loss": 17.9845, "step": 23373 }, { "epoch": 0.4272579377410569, "grad_norm": 6.324919899163823, "learning_rate": 6.402075722272451e-06, "loss": 17.673, "step": 23374 }, { "epoch": 0.42727621693750345, "grad_norm": 5.840632425260436, "learning_rate": 6.401791584610103e-06, "loss": 17.3264, "step": 23375 }, { "epoch": 0.42729449613394993, "grad_norm": 5.941959213094946, "learning_rate": 6.401507442034597e-06, "loss": 17.3341, "step": 23376 }, { "epoch": 0.42731277533039647, "grad_norm": 5.083564261165598, "learning_rate": 6.401223294546929e-06, "loss": 17.1065, "step": 23377 }, { "epoch": 0.427331054526843, "grad_norm": 7.467611811641787, "learning_rate": 6.400939142148095e-06, "loss": 18.23, "step": 23378 }, { "epoch": 0.42734933372328954, "grad_norm": 5.464582705786741, "learning_rate": 6.400654984839091e-06, "loss": 17.0183, "step": 23379 }, { "epoch": 0.4273676129197361, "grad_norm": 7.229755104089995, "learning_rate": 6.4003708226209116e-06, "loss": 18.0654, "step": 23380 }, { "epoch": 0.42738589211618255, "grad_norm": 6.632513083793697, "learning_rate": 6.400086655494555e-06, "loss": 18.0036, "step": 23381 }, { "epoch": 0.4274041713126291, "grad_norm": 7.4728082978196815, "learning_rate": 6.399802483461017e-06, "loss": 17.9919, "step": 23382 }, { "epoch": 0.4274224505090756, "grad_norm": 7.787181386131671, "learning_rate": 6.399518306521293e-06, "loss": 17.7279, "step": 23383 }, { "epoch": 0.42744072970552216, "grad_norm": 5.880190777567864, "learning_rate": 6.399234124676376e-06, "loss": 17.3932, "step": 23384 }, { "epoch": 0.4274590089019687, "grad_norm": 6.543591168829177, "learning_rate": 6.3989499379272665e-06, "loss": 17.7307, "step": 23385 }, { "epoch": 0.4274772880984152, "grad_norm": 7.560626540131132, "learning_rate": 6.398665746274959e-06, "loss": 18.2747, "step": 23386 }, { "epoch": 0.4274955672948617, "grad_norm": 7.082023980148686, "learning_rate": 6.398381549720452e-06, "loss": 17.8332, "step": 23387 }, { "epoch": 0.42751384649130825, "grad_norm": 7.786927866663507, "learning_rate": 6.398097348264736e-06, "loss": 18.5009, "step": 23388 }, { "epoch": 0.4275321256877548, "grad_norm": 5.70308358439869, "learning_rate": 6.397813141908809e-06, "loss": 17.2726, "step": 23389 }, { "epoch": 0.42755040488420126, "grad_norm": 5.99044632759544, "learning_rate": 6.3975289306536704e-06, "loss": 17.4928, "step": 23390 }, { "epoch": 0.4275686840806478, "grad_norm": 6.975416516493688, "learning_rate": 6.397244714500313e-06, "loss": 17.9719, "step": 23391 }, { "epoch": 0.42758696327709433, "grad_norm": 6.339173005272153, "learning_rate": 6.396960493449735e-06, "loss": 17.4774, "step": 23392 }, { "epoch": 0.42760524247354087, "grad_norm": 6.574409026294816, "learning_rate": 6.396676267502931e-06, "loss": 17.6055, "step": 23393 }, { "epoch": 0.4276235216699874, "grad_norm": 7.330237821050877, "learning_rate": 6.396392036660899e-06, "loss": 17.5929, "step": 23394 }, { "epoch": 0.4276418008664339, "grad_norm": 12.230810864601223, "learning_rate": 6.396107800924634e-06, "loss": 18.3993, "step": 23395 }, { "epoch": 0.4276600800628804, "grad_norm": 7.356162325461706, "learning_rate": 6.395823560295131e-06, "loss": 17.8599, "step": 23396 }, { "epoch": 0.42767835925932696, "grad_norm": 6.247100254242655, "learning_rate": 6.3955393147733895e-06, "loss": 17.4883, "step": 23397 }, { "epoch": 0.4276966384557735, "grad_norm": 6.020560638533876, "learning_rate": 6.395255064360401e-06, "loss": 17.5034, "step": 23398 }, { "epoch": 0.42771491765222003, "grad_norm": 5.884288937345549, "learning_rate": 6.394970809057166e-06, "loss": 17.1348, "step": 23399 }, { "epoch": 0.4277331968486665, "grad_norm": 5.84602641285627, "learning_rate": 6.394686548864681e-06, "loss": 17.1399, "step": 23400 }, { "epoch": 0.42775147604511304, "grad_norm": 7.170843289748093, "learning_rate": 6.394402283783938e-06, "loss": 17.8178, "step": 23401 }, { "epoch": 0.4277697552415596, "grad_norm": 6.12561822244916, "learning_rate": 6.394118013815938e-06, "loss": 17.493, "step": 23402 }, { "epoch": 0.4277880344380061, "grad_norm": 7.458365081773975, "learning_rate": 6.393833738961672e-06, "loss": 17.832, "step": 23403 }, { "epoch": 0.42780631363445265, "grad_norm": 5.824320554959616, "learning_rate": 6.3935494592221435e-06, "loss": 17.1904, "step": 23404 }, { "epoch": 0.42782459283089913, "grad_norm": 6.063267217156814, "learning_rate": 6.3932651745983444e-06, "loss": 17.5405, "step": 23405 }, { "epoch": 0.42784287202734567, "grad_norm": 7.899359840208344, "learning_rate": 6.39298088509127e-06, "loss": 17.8857, "step": 23406 }, { "epoch": 0.4278611512237922, "grad_norm": 5.7405543562282615, "learning_rate": 6.3926965907019205e-06, "loss": 17.3189, "step": 23407 }, { "epoch": 0.42787943042023874, "grad_norm": 5.869797000359069, "learning_rate": 6.3924122914312895e-06, "loss": 17.2573, "step": 23408 }, { "epoch": 0.4278977096166853, "grad_norm": 5.407481655789767, "learning_rate": 6.392127987280373e-06, "loss": 17.1029, "step": 23409 }, { "epoch": 0.42791598881313175, "grad_norm": 5.415827292402781, "learning_rate": 6.39184367825017e-06, "loss": 17.3705, "step": 23410 }, { "epoch": 0.4279342680095783, "grad_norm": 6.435835641604202, "learning_rate": 6.391559364341675e-06, "loss": 17.6047, "step": 23411 }, { "epoch": 0.4279525472060248, "grad_norm": 6.857008590435317, "learning_rate": 6.391275045555886e-06, "loss": 18.0624, "step": 23412 }, { "epoch": 0.42797082640247136, "grad_norm": 6.466399128208936, "learning_rate": 6.3909907218937985e-06, "loss": 17.6225, "step": 23413 }, { "epoch": 0.4279891055989179, "grad_norm": 5.721083240196607, "learning_rate": 6.39070639335641e-06, "loss": 17.2616, "step": 23414 }, { "epoch": 0.4280073847953644, "grad_norm": 7.764525729143078, "learning_rate": 6.390422059944716e-06, "loss": 17.9437, "step": 23415 }, { "epoch": 0.4280256639918109, "grad_norm": 7.267272213174589, "learning_rate": 6.390137721659711e-06, "loss": 17.7327, "step": 23416 }, { "epoch": 0.42804394318825745, "grad_norm": 5.616225426549465, "learning_rate": 6.389853378502395e-06, "loss": 17.248, "step": 23417 }, { "epoch": 0.428062222384704, "grad_norm": 6.208328680412768, "learning_rate": 6.389569030473765e-06, "loss": 17.5197, "step": 23418 }, { "epoch": 0.4280805015811505, "grad_norm": 6.186402087249811, "learning_rate": 6.389284677574815e-06, "loss": 17.6012, "step": 23419 }, { "epoch": 0.428098780777597, "grad_norm": 5.891358819072858, "learning_rate": 6.389000319806543e-06, "loss": 17.622, "step": 23420 }, { "epoch": 0.42811705997404353, "grad_norm": 6.162886707761934, "learning_rate": 6.388715957169947e-06, "loss": 17.5174, "step": 23421 }, { "epoch": 0.42813533917049007, "grad_norm": 5.778054424604584, "learning_rate": 6.38843158966602e-06, "loss": 17.3119, "step": 23422 }, { "epoch": 0.4281536183669366, "grad_norm": 6.824577888808088, "learning_rate": 6.3881472172957606e-06, "loss": 17.694, "step": 23423 }, { "epoch": 0.4281718975633831, "grad_norm": 7.722819229871561, "learning_rate": 6.387862840060166e-06, "loss": 17.8384, "step": 23424 }, { "epoch": 0.4281901767598296, "grad_norm": 5.206806597121236, "learning_rate": 6.387578457960233e-06, "loss": 16.9836, "step": 23425 }, { "epoch": 0.42820845595627616, "grad_norm": 6.920501065603111, "learning_rate": 6.3872940709969575e-06, "loss": 17.7597, "step": 23426 }, { "epoch": 0.4282267351527227, "grad_norm": 6.899161944954252, "learning_rate": 6.387009679171336e-06, "loss": 17.8887, "step": 23427 }, { "epoch": 0.42824501434916923, "grad_norm": 7.294812669355296, "learning_rate": 6.386725282484369e-06, "loss": 18.0346, "step": 23428 }, { "epoch": 0.4282632935456157, "grad_norm": 6.217190853006324, "learning_rate": 6.3864408809370484e-06, "loss": 17.4678, "step": 23429 }, { "epoch": 0.42828157274206224, "grad_norm": 6.162584665430932, "learning_rate": 6.386156474530372e-06, "loss": 17.5032, "step": 23430 }, { "epoch": 0.4282998519385088, "grad_norm": 6.444946592469472, "learning_rate": 6.385872063265338e-06, "loss": 17.6911, "step": 23431 }, { "epoch": 0.4283181311349553, "grad_norm": 7.7693908600627095, "learning_rate": 6.385587647142944e-06, "loss": 17.8688, "step": 23432 }, { "epoch": 0.42833641033140185, "grad_norm": 5.589450438592937, "learning_rate": 6.385303226164183e-06, "loss": 17.3504, "step": 23433 }, { "epoch": 0.42835468952784833, "grad_norm": 6.525097342409039, "learning_rate": 6.385018800330056e-06, "loss": 17.321, "step": 23434 }, { "epoch": 0.42837296872429487, "grad_norm": 6.754817938260677, "learning_rate": 6.384734369641558e-06, "loss": 17.5407, "step": 23435 }, { "epoch": 0.4283912479207414, "grad_norm": 5.851656400971432, "learning_rate": 6.384449934099686e-06, "loss": 17.4082, "step": 23436 }, { "epoch": 0.42840952711718794, "grad_norm": 6.852300756559189, "learning_rate": 6.384165493705437e-06, "loss": 17.7443, "step": 23437 }, { "epoch": 0.4284278063136345, "grad_norm": 7.375661965669248, "learning_rate": 6.383881048459808e-06, "loss": 18.0115, "step": 23438 }, { "epoch": 0.42844608551008095, "grad_norm": 5.550086568877446, "learning_rate": 6.383596598363796e-06, "loss": 17.2862, "step": 23439 }, { "epoch": 0.4284643647065275, "grad_norm": 5.993033378286841, "learning_rate": 6.383312143418399e-06, "loss": 17.3555, "step": 23440 }, { "epoch": 0.428482643902974, "grad_norm": 5.820840740675721, "learning_rate": 6.383027683624612e-06, "loss": 17.1568, "step": 23441 }, { "epoch": 0.42850092309942056, "grad_norm": 8.607797647673888, "learning_rate": 6.382743218983434e-06, "loss": 18.587, "step": 23442 }, { "epoch": 0.4285192022958671, "grad_norm": 5.81046941666761, "learning_rate": 6.382458749495859e-06, "loss": 17.13, "step": 23443 }, { "epoch": 0.4285374814923136, "grad_norm": 7.615414587016948, "learning_rate": 6.382174275162887e-06, "loss": 17.9039, "step": 23444 }, { "epoch": 0.4285557606887601, "grad_norm": 6.909462525828415, "learning_rate": 6.381889795985515e-06, "loss": 17.6704, "step": 23445 }, { "epoch": 0.42857403988520665, "grad_norm": 5.694158058507743, "learning_rate": 6.3816053119647395e-06, "loss": 17.2406, "step": 23446 }, { "epoch": 0.4285923190816532, "grad_norm": 6.002936666070136, "learning_rate": 6.381320823101556e-06, "loss": 17.4432, "step": 23447 }, { "epoch": 0.4286105982780997, "grad_norm": 6.189362379272837, "learning_rate": 6.3810363293969615e-06, "loss": 17.3398, "step": 23448 }, { "epoch": 0.4286288774745462, "grad_norm": 5.539747891510892, "learning_rate": 6.3807518308519575e-06, "loss": 17.2422, "step": 23449 }, { "epoch": 0.42864715667099273, "grad_norm": 6.804636414467735, "learning_rate": 6.380467327467537e-06, "loss": 17.7654, "step": 23450 }, { "epoch": 0.42866543586743927, "grad_norm": 7.243815335154215, "learning_rate": 6.380182819244698e-06, "loss": 17.8922, "step": 23451 }, { "epoch": 0.4286837150638858, "grad_norm": 6.036758226415079, "learning_rate": 6.379898306184438e-06, "loss": 17.4417, "step": 23452 }, { "epoch": 0.42870199426033234, "grad_norm": 6.990790364334782, "learning_rate": 6.379613788287754e-06, "loss": 18.0702, "step": 23453 }, { "epoch": 0.4287202734567788, "grad_norm": 6.70366083248654, "learning_rate": 6.379329265555644e-06, "loss": 17.7388, "step": 23454 }, { "epoch": 0.42873855265322536, "grad_norm": 5.785569135151232, "learning_rate": 6.379044737989104e-06, "loss": 17.183, "step": 23455 }, { "epoch": 0.4287568318496719, "grad_norm": 6.619216894772767, "learning_rate": 6.378760205589134e-06, "loss": 17.9898, "step": 23456 }, { "epoch": 0.42877511104611843, "grad_norm": 6.623487871309558, "learning_rate": 6.3784756683567265e-06, "loss": 17.5719, "step": 23457 }, { "epoch": 0.42879339024256496, "grad_norm": 7.345545332429029, "learning_rate": 6.378191126292881e-06, "loss": 17.6108, "step": 23458 }, { "epoch": 0.42881166943901144, "grad_norm": 6.003816686915262, "learning_rate": 6.377906579398598e-06, "loss": 17.4517, "step": 23459 }, { "epoch": 0.428829948635458, "grad_norm": 6.564628632479459, "learning_rate": 6.37762202767487e-06, "loss": 17.5803, "step": 23460 }, { "epoch": 0.4288482278319045, "grad_norm": 5.4720816589631625, "learning_rate": 6.377337471122698e-06, "loss": 17.1215, "step": 23461 }, { "epoch": 0.42886650702835105, "grad_norm": 6.655273116118058, "learning_rate": 6.377052909743075e-06, "loss": 17.5229, "step": 23462 }, { "epoch": 0.42888478622479753, "grad_norm": 6.984137184578219, "learning_rate": 6.376768343537003e-06, "loss": 17.6818, "step": 23463 }, { "epoch": 0.42890306542124407, "grad_norm": 6.047080046835342, "learning_rate": 6.376483772505477e-06, "loss": 17.4843, "step": 23464 }, { "epoch": 0.4289213446176906, "grad_norm": 5.805838726085733, "learning_rate": 6.376199196649494e-06, "loss": 17.2506, "step": 23465 }, { "epoch": 0.42893962381413714, "grad_norm": 7.52425932776498, "learning_rate": 6.375914615970054e-06, "loss": 18.106, "step": 23466 }, { "epoch": 0.4289579030105837, "grad_norm": 5.908963140598852, "learning_rate": 6.37563003046815e-06, "loss": 17.5495, "step": 23467 }, { "epoch": 0.42897618220703015, "grad_norm": 6.53915540978699, "learning_rate": 6.3753454401447845e-06, "loss": 17.4231, "step": 23468 }, { "epoch": 0.4289944614034767, "grad_norm": 6.07557074565523, "learning_rate": 6.375060845000953e-06, "loss": 17.3704, "step": 23469 }, { "epoch": 0.4290127405999232, "grad_norm": 5.44013386682845, "learning_rate": 6.37477624503765e-06, "loss": 17.157, "step": 23470 }, { "epoch": 0.42903101979636976, "grad_norm": 8.099400722174494, "learning_rate": 6.3744916402558775e-06, "loss": 18.2346, "step": 23471 }, { "epoch": 0.4290492989928163, "grad_norm": 6.548503239278486, "learning_rate": 6.37420703065663e-06, "loss": 17.7201, "step": 23472 }, { "epoch": 0.4290675781892628, "grad_norm": 6.781953401474344, "learning_rate": 6.373922416240907e-06, "loss": 17.6232, "step": 23473 }, { "epoch": 0.4290858573857093, "grad_norm": 5.94086083175987, "learning_rate": 6.373637797009706e-06, "loss": 17.1802, "step": 23474 }, { "epoch": 0.42910413658215585, "grad_norm": 6.625007104623523, "learning_rate": 6.373353172964021e-06, "loss": 17.666, "step": 23475 }, { "epoch": 0.4291224157786024, "grad_norm": 7.0803730896948105, "learning_rate": 6.3730685441048545e-06, "loss": 17.8875, "step": 23476 }, { "epoch": 0.4291406949750489, "grad_norm": 7.0384334182787285, "learning_rate": 6.372783910433202e-06, "loss": 17.8789, "step": 23477 }, { "epoch": 0.4291589741714954, "grad_norm": 6.710562453004961, "learning_rate": 6.37249927195006e-06, "loss": 17.7144, "step": 23478 }, { "epoch": 0.42917725336794194, "grad_norm": 7.008047150991366, "learning_rate": 6.372214628656427e-06, "loss": 17.8009, "step": 23479 }, { "epoch": 0.42919553256438847, "grad_norm": 5.735284382354732, "learning_rate": 6.371929980553302e-06, "loss": 17.407, "step": 23480 }, { "epoch": 0.429213811760835, "grad_norm": 6.266421602105894, "learning_rate": 6.37164532764168e-06, "loss": 17.5883, "step": 23481 }, { "epoch": 0.42923209095728154, "grad_norm": 6.604179934218001, "learning_rate": 6.37136066992256e-06, "loss": 17.4246, "step": 23482 }, { "epoch": 0.429250370153728, "grad_norm": 6.678183162723747, "learning_rate": 6.371076007396942e-06, "loss": 17.809, "step": 23483 }, { "epoch": 0.42926864935017456, "grad_norm": 6.645304757768106, "learning_rate": 6.370791340065819e-06, "loss": 17.4831, "step": 23484 }, { "epoch": 0.4292869285466211, "grad_norm": 7.229561117047641, "learning_rate": 6.370506667930193e-06, "loss": 17.8044, "step": 23485 }, { "epoch": 0.42930520774306763, "grad_norm": 7.211862745666257, "learning_rate": 6.370221990991059e-06, "loss": 17.7365, "step": 23486 }, { "epoch": 0.42932348693951417, "grad_norm": 7.6654712734821855, "learning_rate": 6.3699373092494185e-06, "loss": 17.8216, "step": 23487 }, { "epoch": 0.42934176613596065, "grad_norm": 6.927591734126307, "learning_rate": 6.369652622706264e-06, "loss": 17.8755, "step": 23488 }, { "epoch": 0.4293600453324072, "grad_norm": 5.89864838012858, "learning_rate": 6.3693679313625955e-06, "loss": 17.4478, "step": 23489 }, { "epoch": 0.4293783245288537, "grad_norm": 9.105643662289777, "learning_rate": 6.369083235219413e-06, "loss": 18.2581, "step": 23490 }, { "epoch": 0.42939660372530025, "grad_norm": 6.688581317154519, "learning_rate": 6.3687985342777115e-06, "loss": 17.6537, "step": 23491 }, { "epoch": 0.4294148829217468, "grad_norm": 6.440538128411903, "learning_rate": 6.368513828538491e-06, "loss": 17.3301, "step": 23492 }, { "epoch": 0.42943316211819327, "grad_norm": 6.289178477966556, "learning_rate": 6.368229118002746e-06, "loss": 17.5724, "step": 23493 }, { "epoch": 0.4294514413146398, "grad_norm": 5.8884013131330795, "learning_rate": 6.367944402671479e-06, "loss": 17.3068, "step": 23494 }, { "epoch": 0.42946972051108634, "grad_norm": 7.201165273310998, "learning_rate": 6.367659682545685e-06, "loss": 18.0724, "step": 23495 }, { "epoch": 0.4294879997075329, "grad_norm": 6.013576105633485, "learning_rate": 6.367374957626362e-06, "loss": 17.3887, "step": 23496 }, { "epoch": 0.42950627890397935, "grad_norm": 5.994183588812937, "learning_rate": 6.36709022791451e-06, "loss": 17.4696, "step": 23497 }, { "epoch": 0.4295245581004259, "grad_norm": 5.918961348027402, "learning_rate": 6.366805493411122e-06, "loss": 17.4055, "step": 23498 }, { "epoch": 0.4295428372968724, "grad_norm": 6.664741140326103, "learning_rate": 6.366520754117201e-06, "loss": 17.3546, "step": 23499 }, { "epoch": 0.42956111649331896, "grad_norm": 6.489303887878604, "learning_rate": 6.366236010033745e-06, "loss": 17.6144, "step": 23500 }, { "epoch": 0.4295793956897655, "grad_norm": 6.184314106609628, "learning_rate": 6.365951261161749e-06, "loss": 17.7604, "step": 23501 }, { "epoch": 0.429597674886212, "grad_norm": 7.7819691350396845, "learning_rate": 6.365666507502213e-06, "loss": 18.0776, "step": 23502 }, { "epoch": 0.4296159540826585, "grad_norm": 7.812641912064223, "learning_rate": 6.365381749056132e-06, "loss": 17.8831, "step": 23503 }, { "epoch": 0.42963423327910505, "grad_norm": 6.9714881228046695, "learning_rate": 6.365096985824509e-06, "loss": 17.7983, "step": 23504 }, { "epoch": 0.4296525124755516, "grad_norm": 5.83690403922629, "learning_rate": 6.364812217808339e-06, "loss": 17.2189, "step": 23505 }, { "epoch": 0.4296707916719981, "grad_norm": 8.227452114088974, "learning_rate": 6.36452744500862e-06, "loss": 18.3677, "step": 23506 }, { "epoch": 0.4296890708684446, "grad_norm": 5.292856627022733, "learning_rate": 6.364242667426351e-06, "loss": 17.0268, "step": 23507 }, { "epoch": 0.42970735006489114, "grad_norm": 7.44315320327994, "learning_rate": 6.3639578850625305e-06, "loss": 18.1939, "step": 23508 }, { "epoch": 0.42972562926133767, "grad_norm": 8.495513373816406, "learning_rate": 6.363673097918155e-06, "loss": 18.83, "step": 23509 }, { "epoch": 0.4297439084577842, "grad_norm": 5.843429288037855, "learning_rate": 6.3633883059942246e-06, "loss": 17.4012, "step": 23510 }, { "epoch": 0.42976218765423074, "grad_norm": 5.653508601303307, "learning_rate": 6.363103509291735e-06, "loss": 17.3988, "step": 23511 }, { "epoch": 0.4297804668506772, "grad_norm": 5.9362537578750905, "learning_rate": 6.362818707811687e-06, "loss": 17.495, "step": 23512 }, { "epoch": 0.42979874604712376, "grad_norm": 5.418770510451064, "learning_rate": 6.362533901555078e-06, "loss": 17.0801, "step": 23513 }, { "epoch": 0.4298170252435703, "grad_norm": 7.009895044952832, "learning_rate": 6.362249090522906e-06, "loss": 17.8629, "step": 23514 }, { "epoch": 0.42983530444001683, "grad_norm": 7.28416435772863, "learning_rate": 6.361964274716168e-06, "loss": 18.1369, "step": 23515 }, { "epoch": 0.42985358363646337, "grad_norm": 6.379897454566321, "learning_rate": 6.361679454135863e-06, "loss": 17.4548, "step": 23516 }, { "epoch": 0.42987186283290985, "grad_norm": 6.540674653398182, "learning_rate": 6.361394628782991e-06, "loss": 17.6663, "step": 23517 }, { "epoch": 0.4298901420293564, "grad_norm": 6.393240692581243, "learning_rate": 6.361109798658549e-06, "loss": 17.5582, "step": 23518 }, { "epoch": 0.4299084212258029, "grad_norm": 5.943618125336763, "learning_rate": 6.360824963763535e-06, "loss": 17.4567, "step": 23519 }, { "epoch": 0.42992670042224945, "grad_norm": 5.73368450021337, "learning_rate": 6.3605401240989485e-06, "loss": 17.2038, "step": 23520 }, { "epoch": 0.429944979618696, "grad_norm": 6.206420853309497, "learning_rate": 6.360255279665785e-06, "loss": 17.5609, "step": 23521 }, { "epoch": 0.42996325881514247, "grad_norm": 7.4257023033097305, "learning_rate": 6.359970430465045e-06, "loss": 18.1641, "step": 23522 }, { "epoch": 0.429981538011589, "grad_norm": 6.678247074651477, "learning_rate": 6.359685576497727e-06, "loss": 17.5187, "step": 23523 }, { "epoch": 0.42999981720803554, "grad_norm": 5.184782187513316, "learning_rate": 6.35940071776483e-06, "loss": 17.003, "step": 23524 }, { "epoch": 0.4300180964044821, "grad_norm": 6.271490413621714, "learning_rate": 6.359115854267351e-06, "loss": 17.4747, "step": 23525 }, { "epoch": 0.4300363756009286, "grad_norm": 7.708380800894812, "learning_rate": 6.358830986006288e-06, "loss": 17.8523, "step": 23526 }, { "epoch": 0.4300546547973751, "grad_norm": 6.687626777871401, "learning_rate": 6.358546112982642e-06, "loss": 17.3331, "step": 23527 }, { "epoch": 0.4300729339938216, "grad_norm": 6.3734659973425885, "learning_rate": 6.358261235197409e-06, "loss": 17.7526, "step": 23528 }, { "epoch": 0.43009121319026816, "grad_norm": 6.961532529100626, "learning_rate": 6.357976352651588e-06, "loss": 18.1992, "step": 23529 }, { "epoch": 0.4301094923867147, "grad_norm": 7.342318940314222, "learning_rate": 6.357691465346176e-06, "loss": 17.9878, "step": 23530 }, { "epoch": 0.4301277715831612, "grad_norm": 4.868647472607592, "learning_rate": 6.357406573282177e-06, "loss": 16.9925, "step": 23531 }, { "epoch": 0.4301460507796077, "grad_norm": 7.720392733134378, "learning_rate": 6.3571216764605834e-06, "loss": 18.1204, "step": 23532 }, { "epoch": 0.43016432997605425, "grad_norm": 6.0582095173374935, "learning_rate": 6.356836774882395e-06, "loss": 17.3617, "step": 23533 }, { "epoch": 0.4301826091725008, "grad_norm": 6.294570966416092, "learning_rate": 6.356551868548614e-06, "loss": 17.5776, "step": 23534 }, { "epoch": 0.4302008883689473, "grad_norm": 6.149519870982751, "learning_rate": 6.356266957460235e-06, "loss": 17.7005, "step": 23535 }, { "epoch": 0.4302191675653938, "grad_norm": 6.666495115279745, "learning_rate": 6.355982041618258e-06, "loss": 17.6561, "step": 23536 }, { "epoch": 0.43023744676184034, "grad_norm": 5.5696277194670225, "learning_rate": 6.355697121023681e-06, "loss": 17.2011, "step": 23537 }, { "epoch": 0.43025572595828687, "grad_norm": 6.57652460180183, "learning_rate": 6.355412195677505e-06, "loss": 17.9103, "step": 23538 }, { "epoch": 0.4302740051547334, "grad_norm": 5.353515828827387, "learning_rate": 6.355127265580726e-06, "loss": 17.2027, "step": 23539 }, { "epoch": 0.43029228435117994, "grad_norm": 6.7153313542503605, "learning_rate": 6.354842330734343e-06, "loss": 17.6947, "step": 23540 }, { "epoch": 0.4303105635476264, "grad_norm": 6.61578978784584, "learning_rate": 6.354557391139356e-06, "loss": 17.4198, "step": 23541 }, { "epoch": 0.43032884274407296, "grad_norm": 7.698702025652604, "learning_rate": 6.354272446796763e-06, "loss": 18.6244, "step": 23542 }, { "epoch": 0.4303471219405195, "grad_norm": 5.020434350955654, "learning_rate": 6.353987497707561e-06, "loss": 17.1536, "step": 23543 }, { "epoch": 0.43036540113696603, "grad_norm": 6.580445026362194, "learning_rate": 6.353702543872752e-06, "loss": 17.8923, "step": 23544 }, { "epoch": 0.43038368033341257, "grad_norm": 6.9063397978375365, "learning_rate": 6.353417585293333e-06, "loss": 17.7039, "step": 23545 }, { "epoch": 0.43040195952985905, "grad_norm": 5.466611397350951, "learning_rate": 6.353132621970302e-06, "loss": 17.0673, "step": 23546 }, { "epoch": 0.4304202387263056, "grad_norm": 6.117090436525762, "learning_rate": 6.352847653904659e-06, "loss": 17.4691, "step": 23547 }, { "epoch": 0.4304385179227521, "grad_norm": 8.191070441242521, "learning_rate": 6.352562681097402e-06, "loss": 18.4792, "step": 23548 }, { "epoch": 0.43045679711919865, "grad_norm": 5.184905925924164, "learning_rate": 6.352277703549532e-06, "loss": 17.1745, "step": 23549 }, { "epoch": 0.4304750763156452, "grad_norm": 6.102763577036956, "learning_rate": 6.351992721262044e-06, "loss": 17.6115, "step": 23550 }, { "epoch": 0.43049335551209167, "grad_norm": 7.700583774581458, "learning_rate": 6.351707734235939e-06, "loss": 17.936, "step": 23551 }, { "epoch": 0.4305116347085382, "grad_norm": 6.013999561214821, "learning_rate": 6.351422742472215e-06, "loss": 17.4293, "step": 23552 }, { "epoch": 0.43052991390498474, "grad_norm": 6.4355427076619325, "learning_rate": 6.351137745971874e-06, "loss": 17.4397, "step": 23553 }, { "epoch": 0.4305481931014313, "grad_norm": 6.155661478665161, "learning_rate": 6.35085274473591e-06, "loss": 17.3839, "step": 23554 }, { "epoch": 0.4305664722978778, "grad_norm": 6.448929461968095, "learning_rate": 6.350567738765325e-06, "loss": 17.6711, "step": 23555 }, { "epoch": 0.4305847514943243, "grad_norm": 5.863524683486678, "learning_rate": 6.350282728061119e-06, "loss": 17.3323, "step": 23556 }, { "epoch": 0.4306030306907708, "grad_norm": 6.371609691187254, "learning_rate": 6.349997712624287e-06, "loss": 17.302, "step": 23557 }, { "epoch": 0.43062130988721736, "grad_norm": 7.076900386101137, "learning_rate": 6.34971269245583e-06, "loss": 17.5653, "step": 23558 }, { "epoch": 0.4306395890836639, "grad_norm": 7.148225022313409, "learning_rate": 6.34942766755675e-06, "loss": 17.9312, "step": 23559 }, { "epoch": 0.43065786828011043, "grad_norm": 7.5029938852357025, "learning_rate": 6.349142637928041e-06, "loss": 17.7849, "step": 23560 }, { "epoch": 0.4306761474765569, "grad_norm": 5.972651678416425, "learning_rate": 6.348857603570704e-06, "loss": 17.5773, "step": 23561 }, { "epoch": 0.43069442667300345, "grad_norm": 6.354691840650409, "learning_rate": 6.3485725644857375e-06, "loss": 17.5921, "step": 23562 }, { "epoch": 0.43071270586945, "grad_norm": 7.019444950097048, "learning_rate": 6.348287520674144e-06, "loss": 17.8997, "step": 23563 }, { "epoch": 0.4307309850658965, "grad_norm": 5.534087623630858, "learning_rate": 6.3480024721369175e-06, "loss": 17.2789, "step": 23564 }, { "epoch": 0.430749264262343, "grad_norm": 6.998121395021827, "learning_rate": 6.347717418875059e-06, "loss": 18.0811, "step": 23565 }, { "epoch": 0.43076754345878954, "grad_norm": 6.394144033047483, "learning_rate": 6.347432360889569e-06, "loss": 17.8028, "step": 23566 }, { "epoch": 0.4307858226552361, "grad_norm": 6.197785823895567, "learning_rate": 6.3471472981814455e-06, "loss": 17.3168, "step": 23567 }, { "epoch": 0.4308041018516826, "grad_norm": 7.082447720214877, "learning_rate": 6.346862230751687e-06, "loss": 18.2345, "step": 23568 }, { "epoch": 0.43082238104812914, "grad_norm": 6.853089912572116, "learning_rate": 6.346577158601295e-06, "loss": 17.658, "step": 23569 }, { "epoch": 0.4308406602445756, "grad_norm": 6.759346800709279, "learning_rate": 6.346292081731263e-06, "loss": 17.6147, "step": 23570 }, { "epoch": 0.43085893944102216, "grad_norm": 5.888435230387981, "learning_rate": 6.346007000142597e-06, "loss": 17.5343, "step": 23571 }, { "epoch": 0.4308772186374687, "grad_norm": 6.506146373043827, "learning_rate": 6.345721913836293e-06, "loss": 17.7227, "step": 23572 }, { "epoch": 0.43089549783391523, "grad_norm": 6.400574972811731, "learning_rate": 6.345436822813351e-06, "loss": 17.5505, "step": 23573 }, { "epoch": 0.43091377703036177, "grad_norm": 6.925905266755376, "learning_rate": 6.345151727074769e-06, "loss": 17.981, "step": 23574 }, { "epoch": 0.43093205622680825, "grad_norm": 6.203663754888339, "learning_rate": 6.344866626621545e-06, "loss": 17.6513, "step": 23575 }, { "epoch": 0.4309503354232548, "grad_norm": 9.25854042975842, "learning_rate": 6.3445815214546835e-06, "loss": 17.6923, "step": 23576 }, { "epoch": 0.4309686146197013, "grad_norm": 5.340424561620956, "learning_rate": 6.34429641157518e-06, "loss": 17.0434, "step": 23577 }, { "epoch": 0.43098689381614785, "grad_norm": 7.223076131341303, "learning_rate": 6.344011296984032e-06, "loss": 17.7422, "step": 23578 }, { "epoch": 0.4310051730125944, "grad_norm": 7.421662114646333, "learning_rate": 6.343726177682242e-06, "loss": 18.237, "step": 23579 }, { "epoch": 0.43102345220904087, "grad_norm": 6.412888405338289, "learning_rate": 6.343441053670809e-06, "loss": 17.768, "step": 23580 }, { "epoch": 0.4310417314054874, "grad_norm": 5.686539067838539, "learning_rate": 6.343155924950731e-06, "loss": 17.561, "step": 23581 }, { "epoch": 0.43106001060193394, "grad_norm": 6.2577164798291305, "learning_rate": 6.3428707915230084e-06, "loss": 17.3655, "step": 23582 }, { "epoch": 0.4310782897983805, "grad_norm": 6.478114533256353, "learning_rate": 6.342585653388641e-06, "loss": 17.7416, "step": 23583 }, { "epoch": 0.431096568994827, "grad_norm": 6.864987302960307, "learning_rate": 6.3423005105486255e-06, "loss": 17.835, "step": 23584 }, { "epoch": 0.4311148481912735, "grad_norm": 7.418753310101508, "learning_rate": 6.342015363003964e-06, "loss": 17.3938, "step": 23585 }, { "epoch": 0.43113312738772, "grad_norm": 6.927264000389076, "learning_rate": 6.341730210755656e-06, "loss": 17.8835, "step": 23586 }, { "epoch": 0.43115140658416656, "grad_norm": 6.540024975991596, "learning_rate": 6.3414450538047e-06, "loss": 17.5499, "step": 23587 }, { "epoch": 0.4311696857806131, "grad_norm": 6.128366513281492, "learning_rate": 6.341159892152094e-06, "loss": 17.327, "step": 23588 }, { "epoch": 0.43118796497705963, "grad_norm": 6.531556646376057, "learning_rate": 6.340874725798839e-06, "loss": 17.3907, "step": 23589 }, { "epoch": 0.4312062441735061, "grad_norm": 5.874498961371777, "learning_rate": 6.340589554745936e-06, "loss": 17.3448, "step": 23590 }, { "epoch": 0.43122452336995265, "grad_norm": 6.398861232659166, "learning_rate": 6.340304378994382e-06, "loss": 17.3126, "step": 23591 }, { "epoch": 0.4312428025663992, "grad_norm": 6.822006102054159, "learning_rate": 6.340019198545177e-06, "loss": 17.5016, "step": 23592 }, { "epoch": 0.4312610817628457, "grad_norm": 5.416352710818658, "learning_rate": 6.339734013399323e-06, "loss": 17.344, "step": 23593 }, { "epoch": 0.43127936095929226, "grad_norm": 5.854315251684952, "learning_rate": 6.339448823557816e-06, "loss": 17.5095, "step": 23594 }, { "epoch": 0.43129764015573874, "grad_norm": 4.862848523644214, "learning_rate": 6.339163629021656e-06, "loss": 16.6493, "step": 23595 }, { "epoch": 0.4313159193521853, "grad_norm": 7.724796247332725, "learning_rate": 6.338878429791846e-06, "loss": 18.062, "step": 23596 }, { "epoch": 0.4313341985486318, "grad_norm": 6.703452027022384, "learning_rate": 6.338593225869382e-06, "loss": 17.9162, "step": 23597 }, { "epoch": 0.43135247774507834, "grad_norm": 6.360853912618733, "learning_rate": 6.338308017255265e-06, "loss": 17.6445, "step": 23598 }, { "epoch": 0.4313707569415248, "grad_norm": 5.146724697823028, "learning_rate": 6.338022803950495e-06, "loss": 16.985, "step": 23599 }, { "epoch": 0.43138903613797136, "grad_norm": 6.493735869695229, "learning_rate": 6.337737585956072e-06, "loss": 17.4252, "step": 23600 }, { "epoch": 0.4314073153344179, "grad_norm": 6.177591743726019, "learning_rate": 6.337452363272994e-06, "loss": 17.499, "step": 23601 }, { "epoch": 0.43142559453086443, "grad_norm": 5.310687994515556, "learning_rate": 6.3371671359022595e-06, "loss": 16.9643, "step": 23602 }, { "epoch": 0.43144387372731097, "grad_norm": 6.164661027840266, "learning_rate": 6.336881903844872e-06, "loss": 17.3908, "step": 23603 }, { "epoch": 0.43146215292375745, "grad_norm": 6.258563344737975, "learning_rate": 6.33659666710183e-06, "loss": 17.2719, "step": 23604 }, { "epoch": 0.431480432120204, "grad_norm": 6.074955619878194, "learning_rate": 6.336311425674132e-06, "loss": 17.3799, "step": 23605 }, { "epoch": 0.4314987113166505, "grad_norm": 6.905842635532751, "learning_rate": 6.336026179562777e-06, "loss": 17.6053, "step": 23606 }, { "epoch": 0.43151699051309705, "grad_norm": 5.6531995983639165, "learning_rate": 6.335740928768769e-06, "loss": 17.2047, "step": 23607 }, { "epoch": 0.4315352697095436, "grad_norm": 6.341681698187891, "learning_rate": 6.335455673293102e-06, "loss": 17.4688, "step": 23608 }, { "epoch": 0.43155354890599007, "grad_norm": 6.6960972846629, "learning_rate": 6.335170413136782e-06, "loss": 17.6311, "step": 23609 }, { "epoch": 0.4315718281024366, "grad_norm": 5.425467928272902, "learning_rate": 6.3348851483008034e-06, "loss": 17.2715, "step": 23610 }, { "epoch": 0.43159010729888314, "grad_norm": 5.786997763853015, "learning_rate": 6.334599878786169e-06, "loss": 17.0508, "step": 23611 }, { "epoch": 0.4316083864953297, "grad_norm": 7.2793864873006315, "learning_rate": 6.334314604593877e-06, "loss": 17.8548, "step": 23612 }, { "epoch": 0.4316266656917762, "grad_norm": 7.507989362870152, "learning_rate": 6.334029325724928e-06, "loss": 17.5372, "step": 23613 }, { "epoch": 0.4316449448882227, "grad_norm": 6.007074653945293, "learning_rate": 6.333744042180324e-06, "loss": 17.4673, "step": 23614 }, { "epoch": 0.43166322408466923, "grad_norm": 7.066863931780278, "learning_rate": 6.3334587539610616e-06, "loss": 17.6358, "step": 23615 }, { "epoch": 0.43168150328111576, "grad_norm": 5.730963674864685, "learning_rate": 6.33317346106814e-06, "loss": 17.2837, "step": 23616 }, { "epoch": 0.4316997824775623, "grad_norm": 6.995700824601071, "learning_rate": 6.3328881635025645e-06, "loss": 17.9608, "step": 23617 }, { "epoch": 0.43171806167400884, "grad_norm": 7.683474690210917, "learning_rate": 6.33260286126533e-06, "loss": 17.9472, "step": 23618 }, { "epoch": 0.4317363408704553, "grad_norm": 5.3997313453108005, "learning_rate": 6.332317554357439e-06, "loss": 17.3005, "step": 23619 }, { "epoch": 0.43175462006690185, "grad_norm": 7.647251536137354, "learning_rate": 6.332032242779888e-06, "loss": 18.0896, "step": 23620 }, { "epoch": 0.4317728992633484, "grad_norm": 6.0922444223845025, "learning_rate": 6.3317469265336825e-06, "loss": 17.2557, "step": 23621 }, { "epoch": 0.4317911784597949, "grad_norm": 6.41205927824415, "learning_rate": 6.331461605619819e-06, "loss": 17.6117, "step": 23622 }, { "epoch": 0.43180945765624146, "grad_norm": 6.974559947009274, "learning_rate": 6.331176280039297e-06, "loss": 17.9107, "step": 23623 }, { "epoch": 0.43182773685268794, "grad_norm": 6.111808477909253, "learning_rate": 6.330890949793118e-06, "loss": 17.6125, "step": 23624 }, { "epoch": 0.4318460160491345, "grad_norm": 6.386831922776062, "learning_rate": 6.330605614882282e-06, "loss": 17.3546, "step": 23625 }, { "epoch": 0.431864295245581, "grad_norm": 5.6752321721912455, "learning_rate": 6.330320275307788e-06, "loss": 17.4309, "step": 23626 }, { "epoch": 0.43188257444202754, "grad_norm": 6.8700104537621645, "learning_rate": 6.3300349310706385e-06, "loss": 17.7857, "step": 23627 }, { "epoch": 0.4319008536384741, "grad_norm": 5.587201181943335, "learning_rate": 6.329749582171831e-06, "loss": 17.4384, "step": 23628 }, { "epoch": 0.43191913283492056, "grad_norm": 7.180760901175978, "learning_rate": 6.329464228612366e-06, "loss": 17.8585, "step": 23629 }, { "epoch": 0.4319374120313671, "grad_norm": 6.486207732412409, "learning_rate": 6.329178870393245e-06, "loss": 17.4598, "step": 23630 }, { "epoch": 0.43195569122781363, "grad_norm": 5.825725309102074, "learning_rate": 6.328893507515469e-06, "loss": 17.2494, "step": 23631 }, { "epoch": 0.43197397042426017, "grad_norm": 6.5699124501434065, "learning_rate": 6.328608139980035e-06, "loss": 17.4834, "step": 23632 }, { "epoch": 0.43199224962070665, "grad_norm": 5.819090964177129, "learning_rate": 6.328322767787944e-06, "loss": 17.1533, "step": 23633 }, { "epoch": 0.4320105288171532, "grad_norm": 5.410143388116245, "learning_rate": 6.328037390940196e-06, "loss": 17.1068, "step": 23634 }, { "epoch": 0.4320288080135997, "grad_norm": 5.6812501974864675, "learning_rate": 6.327752009437795e-06, "loss": 17.2393, "step": 23635 }, { "epoch": 0.43204708721004625, "grad_norm": 5.394884613321816, "learning_rate": 6.327466623281737e-06, "loss": 17.105, "step": 23636 }, { "epoch": 0.4320653664064928, "grad_norm": 6.279848727586088, "learning_rate": 6.3271812324730246e-06, "loss": 17.5912, "step": 23637 }, { "epoch": 0.43208364560293927, "grad_norm": 7.0200001625514865, "learning_rate": 6.326895837012657e-06, "loss": 17.8785, "step": 23638 }, { "epoch": 0.4321019247993858, "grad_norm": 6.6011727904737, "learning_rate": 6.326610436901633e-06, "loss": 17.6225, "step": 23639 }, { "epoch": 0.43212020399583234, "grad_norm": 9.000078204813386, "learning_rate": 6.3263250321409565e-06, "loss": 18.1729, "step": 23640 }, { "epoch": 0.4321384831922789, "grad_norm": 6.494816833204111, "learning_rate": 6.326039622731625e-06, "loss": 17.5466, "step": 23641 }, { "epoch": 0.4321567623887254, "grad_norm": 7.552311891076966, "learning_rate": 6.325754208674639e-06, "loss": 18.2418, "step": 23642 }, { "epoch": 0.4321750415851719, "grad_norm": 4.660648391971336, "learning_rate": 6.325468789971e-06, "loss": 17.0135, "step": 23643 }, { "epoch": 0.43219332078161843, "grad_norm": 7.794188448478198, "learning_rate": 6.325183366621708e-06, "loss": 18.2133, "step": 23644 }, { "epoch": 0.43221159997806496, "grad_norm": 6.958218183860368, "learning_rate": 6.324897938627764e-06, "loss": 17.6694, "step": 23645 }, { "epoch": 0.4322298791745115, "grad_norm": 5.308723604304931, "learning_rate": 6.3246125059901675e-06, "loss": 17.1629, "step": 23646 }, { "epoch": 0.43224815837095804, "grad_norm": 6.898854752428574, "learning_rate": 6.324327068709919e-06, "loss": 17.6634, "step": 23647 }, { "epoch": 0.4322664375674045, "grad_norm": 5.657987460756355, "learning_rate": 6.3240416267880176e-06, "loss": 17.4149, "step": 23648 }, { "epoch": 0.43228471676385105, "grad_norm": 6.514028981464109, "learning_rate": 6.323756180225467e-06, "loss": 17.4828, "step": 23649 }, { "epoch": 0.4323029959602976, "grad_norm": 6.3316402606485065, "learning_rate": 6.323470729023265e-06, "loss": 17.4596, "step": 23650 }, { "epoch": 0.4323212751567441, "grad_norm": 6.1611782907205495, "learning_rate": 6.323185273182414e-06, "loss": 17.7437, "step": 23651 }, { "epoch": 0.43233955435319066, "grad_norm": 6.204323257004265, "learning_rate": 6.322899812703912e-06, "loss": 17.7884, "step": 23652 }, { "epoch": 0.43235783354963714, "grad_norm": 6.031031266139402, "learning_rate": 6.3226143475887615e-06, "loss": 17.5877, "step": 23653 }, { "epoch": 0.4323761127460837, "grad_norm": 6.642578101931615, "learning_rate": 6.322328877837962e-06, "loss": 17.8141, "step": 23654 }, { "epoch": 0.4323943919425302, "grad_norm": 6.232290290975836, "learning_rate": 6.322043403452516e-06, "loss": 17.6875, "step": 23655 }, { "epoch": 0.43241267113897675, "grad_norm": 6.447425512472471, "learning_rate": 6.321757924433423e-06, "loss": 17.7279, "step": 23656 }, { "epoch": 0.4324309503354233, "grad_norm": 5.546854156608528, "learning_rate": 6.32147244078168e-06, "loss": 17.178, "step": 23657 }, { "epoch": 0.43244922953186976, "grad_norm": 7.348328477775804, "learning_rate": 6.321186952498292e-06, "loss": 18.0173, "step": 23658 }, { "epoch": 0.4324675087283163, "grad_norm": 6.889108306857017, "learning_rate": 6.320901459584261e-06, "loss": 17.8659, "step": 23659 }, { "epoch": 0.43248578792476283, "grad_norm": 6.132608183396902, "learning_rate": 6.320615962040582e-06, "loss": 17.4422, "step": 23660 }, { "epoch": 0.43250406712120937, "grad_norm": 6.72385031976017, "learning_rate": 6.32033045986826e-06, "loss": 17.7272, "step": 23661 }, { "epoch": 0.4325223463176559, "grad_norm": 4.912147979234194, "learning_rate": 6.320044953068292e-06, "loss": 16.9572, "step": 23662 }, { "epoch": 0.4325406255141024, "grad_norm": 6.492503014424679, "learning_rate": 6.319759441641684e-06, "loss": 17.6422, "step": 23663 }, { "epoch": 0.4325589047105489, "grad_norm": 6.552009588590618, "learning_rate": 6.319473925589434e-06, "loss": 17.6427, "step": 23664 }, { "epoch": 0.43257718390699545, "grad_norm": 5.223481546395908, "learning_rate": 6.319188404912539e-06, "loss": 16.9528, "step": 23665 }, { "epoch": 0.432595463103442, "grad_norm": 6.261079903315186, "learning_rate": 6.3189028796120064e-06, "loss": 17.7824, "step": 23666 }, { "epoch": 0.43261374229988847, "grad_norm": 7.699282425401338, "learning_rate": 6.318617349688833e-06, "loss": 18.1409, "step": 23667 }, { "epoch": 0.432632021496335, "grad_norm": 6.000838364614353, "learning_rate": 6.3183318151440185e-06, "loss": 17.6232, "step": 23668 }, { "epoch": 0.43265030069278154, "grad_norm": 7.386893603469021, "learning_rate": 6.318046275978568e-06, "loss": 17.883, "step": 23669 }, { "epoch": 0.4326685798892281, "grad_norm": 5.803587478337489, "learning_rate": 6.317760732193476e-06, "loss": 17.1153, "step": 23670 }, { "epoch": 0.4326868590856746, "grad_norm": 5.739835343185507, "learning_rate": 6.317475183789749e-06, "loss": 17.4805, "step": 23671 }, { "epoch": 0.4327051382821211, "grad_norm": 6.590862740531634, "learning_rate": 6.317189630768387e-06, "loss": 17.5096, "step": 23672 }, { "epoch": 0.43272341747856763, "grad_norm": 6.227370853172004, "learning_rate": 6.31690407313039e-06, "loss": 17.3566, "step": 23673 }, { "epoch": 0.43274169667501416, "grad_norm": 7.471506976174249, "learning_rate": 6.316618510876756e-06, "loss": 17.8985, "step": 23674 }, { "epoch": 0.4327599758714607, "grad_norm": 6.930991009726059, "learning_rate": 6.316332944008489e-06, "loss": 17.7353, "step": 23675 }, { "epoch": 0.43277825506790724, "grad_norm": 8.710427864066192, "learning_rate": 6.31604737252659e-06, "loss": 17.9213, "step": 23676 }, { "epoch": 0.4327965342643537, "grad_norm": 7.255891957310312, "learning_rate": 6.315761796432059e-06, "loss": 18.1452, "step": 23677 }, { "epoch": 0.43281481346080025, "grad_norm": 7.331456921649562, "learning_rate": 6.315476215725898e-06, "loss": 18.0464, "step": 23678 }, { "epoch": 0.4328330926572468, "grad_norm": 5.865152578344326, "learning_rate": 6.3151906304091044e-06, "loss": 17.3125, "step": 23679 }, { "epoch": 0.4328513718536933, "grad_norm": 6.2557696130679314, "learning_rate": 6.314905040482684e-06, "loss": 17.2814, "step": 23680 }, { "epoch": 0.43286965105013986, "grad_norm": 6.979713538355563, "learning_rate": 6.314619445947635e-06, "loss": 17.9718, "step": 23681 }, { "epoch": 0.43288793024658634, "grad_norm": 6.949967968444671, "learning_rate": 6.314333846804958e-06, "loss": 17.8274, "step": 23682 }, { "epoch": 0.4329062094430329, "grad_norm": 7.340115865364658, "learning_rate": 6.3140482430556575e-06, "loss": 18.0253, "step": 23683 }, { "epoch": 0.4329244886394794, "grad_norm": 6.784555261414443, "learning_rate": 6.3137626347007285e-06, "loss": 17.7889, "step": 23684 }, { "epoch": 0.43294276783592595, "grad_norm": 7.6079277560130905, "learning_rate": 6.313477021741177e-06, "loss": 18.6101, "step": 23685 }, { "epoch": 0.4329610470323725, "grad_norm": 7.2347278507334245, "learning_rate": 6.313191404178003e-06, "loss": 17.5927, "step": 23686 }, { "epoch": 0.43297932622881896, "grad_norm": 6.8709107838501025, "learning_rate": 6.312905782012208e-06, "loss": 17.9347, "step": 23687 }, { "epoch": 0.4329976054252655, "grad_norm": 6.193055591317017, "learning_rate": 6.312620155244791e-06, "loss": 18.3008, "step": 23688 }, { "epoch": 0.43301588462171203, "grad_norm": 7.119030086685474, "learning_rate": 6.312334523876753e-06, "loss": 17.7394, "step": 23689 }, { "epoch": 0.43303416381815857, "grad_norm": 8.173876144565408, "learning_rate": 6.312048887909098e-06, "loss": 18.3403, "step": 23690 }, { "epoch": 0.4330524430146051, "grad_norm": 4.51053767601419, "learning_rate": 6.311763247342824e-06, "loss": 16.9699, "step": 23691 }, { "epoch": 0.4330707222110516, "grad_norm": 5.552189046937376, "learning_rate": 6.311477602178936e-06, "loss": 17.2013, "step": 23692 }, { "epoch": 0.4330890014074981, "grad_norm": 5.192470755949138, "learning_rate": 6.31119195241843e-06, "loss": 16.9921, "step": 23693 }, { "epoch": 0.43310728060394466, "grad_norm": 5.885915132582066, "learning_rate": 6.310906298062313e-06, "loss": 17.48, "step": 23694 }, { "epoch": 0.4331255598003912, "grad_norm": 6.116542001462024, "learning_rate": 6.310620639111581e-06, "loss": 17.4715, "step": 23695 }, { "epoch": 0.4331438389968377, "grad_norm": 7.1573774379069155, "learning_rate": 6.310334975567238e-06, "loss": 18.1411, "step": 23696 }, { "epoch": 0.4331621181932842, "grad_norm": 6.87937979468646, "learning_rate": 6.310049307430285e-06, "loss": 17.7575, "step": 23697 }, { "epoch": 0.43318039738973074, "grad_norm": 6.805774397697574, "learning_rate": 6.309763634701722e-06, "loss": 17.7559, "step": 23698 }, { "epoch": 0.4331986765861773, "grad_norm": 6.354744043821025, "learning_rate": 6.309477957382551e-06, "loss": 17.8381, "step": 23699 }, { "epoch": 0.4332169557826238, "grad_norm": 6.263618172154704, "learning_rate": 6.309192275473776e-06, "loss": 17.4542, "step": 23700 }, { "epoch": 0.4332352349790703, "grad_norm": 5.9982633904496945, "learning_rate": 6.308906588976393e-06, "loss": 17.245, "step": 23701 }, { "epoch": 0.43325351417551683, "grad_norm": 6.403164628163186, "learning_rate": 6.3086208978914055e-06, "loss": 17.5769, "step": 23702 }, { "epoch": 0.43327179337196337, "grad_norm": 5.2043635896948865, "learning_rate": 6.3083352022198176e-06, "loss": 16.9292, "step": 23703 }, { "epoch": 0.4332900725684099, "grad_norm": 5.8461032049150266, "learning_rate": 6.308049501962628e-06, "loss": 17.3624, "step": 23704 }, { "epoch": 0.43330835176485644, "grad_norm": 6.958185011318118, "learning_rate": 6.3077637971208376e-06, "loss": 17.3023, "step": 23705 }, { "epoch": 0.4333266309613029, "grad_norm": 6.964875055633574, "learning_rate": 6.307478087695448e-06, "loss": 17.7401, "step": 23706 }, { "epoch": 0.43334491015774945, "grad_norm": 7.906333688005333, "learning_rate": 6.307192373687462e-06, "loss": 18.0221, "step": 23707 }, { "epoch": 0.433363189354196, "grad_norm": 6.303503673044512, "learning_rate": 6.3069066550978795e-06, "loss": 17.6923, "step": 23708 }, { "epoch": 0.4333814685506425, "grad_norm": 7.393071072362138, "learning_rate": 6.306620931927702e-06, "loss": 17.565, "step": 23709 }, { "epoch": 0.43339974774708906, "grad_norm": 6.959454472348956, "learning_rate": 6.306335204177933e-06, "loss": 17.7927, "step": 23710 }, { "epoch": 0.43341802694353554, "grad_norm": 6.3942945657447545, "learning_rate": 6.306049471849572e-06, "loss": 17.4261, "step": 23711 }, { "epoch": 0.4334363061399821, "grad_norm": 7.0891884242612795, "learning_rate": 6.305763734943622e-06, "loss": 18.0542, "step": 23712 }, { "epoch": 0.4334545853364286, "grad_norm": 5.988720333390101, "learning_rate": 6.3054779934610825e-06, "loss": 17.3924, "step": 23713 }, { "epoch": 0.43347286453287515, "grad_norm": 8.290340208252422, "learning_rate": 6.305192247402956e-06, "loss": 18.3084, "step": 23714 }, { "epoch": 0.4334911437293217, "grad_norm": 5.889952083587758, "learning_rate": 6.304906496770244e-06, "loss": 17.2681, "step": 23715 }, { "epoch": 0.43350942292576816, "grad_norm": 6.112561906215684, "learning_rate": 6.304620741563946e-06, "loss": 17.4354, "step": 23716 }, { "epoch": 0.4335277021222147, "grad_norm": 5.97949838288502, "learning_rate": 6.304334981785067e-06, "loss": 17.4104, "step": 23717 }, { "epoch": 0.43354598131866123, "grad_norm": 6.254784076973419, "learning_rate": 6.3040492174346095e-06, "loss": 17.7769, "step": 23718 }, { "epoch": 0.43356426051510777, "grad_norm": 4.91749993441896, "learning_rate": 6.303763448513569e-06, "loss": 16.818, "step": 23719 }, { "epoch": 0.4335825397115543, "grad_norm": 6.556698095861072, "learning_rate": 6.303477675022952e-06, "loss": 17.5761, "step": 23720 }, { "epoch": 0.4336008189080008, "grad_norm": 7.580466775949854, "learning_rate": 6.3031918969637595e-06, "loss": 17.9656, "step": 23721 }, { "epoch": 0.4336190981044473, "grad_norm": 7.378613834067667, "learning_rate": 6.302906114336992e-06, "loss": 17.9768, "step": 23722 }, { "epoch": 0.43363737730089386, "grad_norm": 6.573427134354408, "learning_rate": 6.302620327143652e-06, "loss": 17.6758, "step": 23723 }, { "epoch": 0.4336556564973404, "grad_norm": 7.381865920222753, "learning_rate": 6.3023345353847395e-06, "loss": 17.8383, "step": 23724 }, { "epoch": 0.4336739356937869, "grad_norm": 7.178408294525828, "learning_rate": 6.302048739061258e-06, "loss": 18.1773, "step": 23725 }, { "epoch": 0.4336922148902334, "grad_norm": 6.765000874438852, "learning_rate": 6.30176293817421e-06, "loss": 17.7684, "step": 23726 }, { "epoch": 0.43371049408667994, "grad_norm": 7.228686473083321, "learning_rate": 6.301477132724594e-06, "loss": 17.7485, "step": 23727 }, { "epoch": 0.4337287732831265, "grad_norm": 7.387191203986933, "learning_rate": 6.301191322713416e-06, "loss": 17.9596, "step": 23728 }, { "epoch": 0.433747052479573, "grad_norm": 6.741546671682292, "learning_rate": 6.300905508141672e-06, "loss": 17.6733, "step": 23729 }, { "epoch": 0.43376533167601955, "grad_norm": 6.506066753363428, "learning_rate": 6.30061968901037e-06, "loss": 17.6092, "step": 23730 }, { "epoch": 0.43378361087246603, "grad_norm": 6.6999741709423315, "learning_rate": 6.300333865320507e-06, "loss": 17.7757, "step": 23731 }, { "epoch": 0.43380189006891257, "grad_norm": 6.947325471826711, "learning_rate": 6.300048037073089e-06, "loss": 17.9511, "step": 23732 }, { "epoch": 0.4338201692653591, "grad_norm": 5.874680522608939, "learning_rate": 6.299762204269113e-06, "loss": 17.4567, "step": 23733 }, { "epoch": 0.43383844846180564, "grad_norm": 6.361205809933727, "learning_rate": 6.299476366909583e-06, "loss": 17.7777, "step": 23734 }, { "epoch": 0.4338567276582521, "grad_norm": 8.225822282790286, "learning_rate": 6.299190524995503e-06, "loss": 18.1674, "step": 23735 }, { "epoch": 0.43387500685469865, "grad_norm": 6.356940099536296, "learning_rate": 6.298904678527873e-06, "loss": 17.5669, "step": 23736 }, { "epoch": 0.4338932860511452, "grad_norm": 6.480234310564968, "learning_rate": 6.2986188275076945e-06, "loss": 17.5864, "step": 23737 }, { "epoch": 0.4339115652475917, "grad_norm": 6.6292184884954555, "learning_rate": 6.298332971935968e-06, "loss": 17.2966, "step": 23738 }, { "epoch": 0.43392984444403826, "grad_norm": 6.738738912862232, "learning_rate": 6.298047111813699e-06, "loss": 18.0916, "step": 23739 }, { "epoch": 0.43394812364048474, "grad_norm": 7.26931032448823, "learning_rate": 6.297761247141886e-06, "loss": 18.0442, "step": 23740 }, { "epoch": 0.4339664028369313, "grad_norm": 5.247154909447839, "learning_rate": 6.297475377921534e-06, "loss": 16.9678, "step": 23741 }, { "epoch": 0.4339846820333778, "grad_norm": 6.959207063024884, "learning_rate": 6.297189504153642e-06, "loss": 17.497, "step": 23742 }, { "epoch": 0.43400296122982435, "grad_norm": 6.192873902998268, "learning_rate": 6.296903625839214e-06, "loss": 17.2276, "step": 23743 }, { "epoch": 0.4340212404262709, "grad_norm": 6.8783072443713555, "learning_rate": 6.296617742979251e-06, "loss": 17.8566, "step": 23744 }, { "epoch": 0.43403951962271736, "grad_norm": 8.055878202472636, "learning_rate": 6.296331855574757e-06, "loss": 18.0506, "step": 23745 }, { "epoch": 0.4340577988191639, "grad_norm": 6.284688876324532, "learning_rate": 6.29604596362673e-06, "loss": 17.8828, "step": 23746 }, { "epoch": 0.43407607801561043, "grad_norm": 5.41551190715901, "learning_rate": 6.295760067136177e-06, "loss": 17.1019, "step": 23747 }, { "epoch": 0.43409435721205697, "grad_norm": 6.578470511427789, "learning_rate": 6.295474166104093e-06, "loss": 17.4602, "step": 23748 }, { "epoch": 0.4341126364085035, "grad_norm": 7.0332412085414715, "learning_rate": 6.295188260531488e-06, "loss": 17.862, "step": 23749 }, { "epoch": 0.43413091560495, "grad_norm": 6.817839618855633, "learning_rate": 6.294902350419361e-06, "loss": 17.7536, "step": 23750 }, { "epoch": 0.4341491948013965, "grad_norm": 6.1622146518976315, "learning_rate": 6.2946164357687115e-06, "loss": 17.4708, "step": 23751 }, { "epoch": 0.43416747399784306, "grad_norm": 8.014778122356413, "learning_rate": 6.294330516580545e-06, "loss": 18.0563, "step": 23752 }, { "epoch": 0.4341857531942896, "grad_norm": 6.252125107404907, "learning_rate": 6.294044592855861e-06, "loss": 17.3551, "step": 23753 }, { "epoch": 0.4342040323907361, "grad_norm": 5.6135930489602375, "learning_rate": 6.293758664595664e-06, "loss": 17.2825, "step": 23754 }, { "epoch": 0.4342223115871826, "grad_norm": 7.749024116360044, "learning_rate": 6.2934727318009555e-06, "loss": 17.9782, "step": 23755 }, { "epoch": 0.43424059078362914, "grad_norm": 7.256405670159264, "learning_rate": 6.293186794472736e-06, "loss": 18.049, "step": 23756 }, { "epoch": 0.4342588699800757, "grad_norm": 7.390234266504027, "learning_rate": 6.2929008526120106e-06, "loss": 17.8325, "step": 23757 }, { "epoch": 0.4342771491765222, "grad_norm": 6.2910071211717975, "learning_rate": 6.292614906219778e-06, "loss": 17.4764, "step": 23758 }, { "epoch": 0.43429542837296875, "grad_norm": 6.114919999794618, "learning_rate": 6.292328955297046e-06, "loss": 17.6181, "step": 23759 }, { "epoch": 0.43431370756941523, "grad_norm": 5.737394877965056, "learning_rate": 6.292042999844809e-06, "loss": 17.2908, "step": 23760 }, { "epoch": 0.43433198676586177, "grad_norm": 6.138453097840232, "learning_rate": 6.2917570398640746e-06, "loss": 17.6269, "step": 23761 }, { "epoch": 0.4343502659623083, "grad_norm": 6.406815295454056, "learning_rate": 6.291471075355845e-06, "loss": 17.5962, "step": 23762 }, { "epoch": 0.43436854515875484, "grad_norm": 7.716533458941187, "learning_rate": 6.291185106321121e-06, "loss": 18.1808, "step": 23763 }, { "epoch": 0.4343868243552014, "grad_norm": 7.1176732978572765, "learning_rate": 6.290899132760906e-06, "loss": 17.9156, "step": 23764 }, { "epoch": 0.43440510355164785, "grad_norm": 5.961970929296037, "learning_rate": 6.2906131546761996e-06, "loss": 17.4415, "step": 23765 }, { "epoch": 0.4344233827480944, "grad_norm": 5.569594998420012, "learning_rate": 6.290327172068007e-06, "loss": 17.1737, "step": 23766 }, { "epoch": 0.4344416619445409, "grad_norm": 7.591474637343277, "learning_rate": 6.29004118493733e-06, "loss": 18.2131, "step": 23767 }, { "epoch": 0.43445994114098746, "grad_norm": 13.932519950208663, "learning_rate": 6.28975519328517e-06, "loss": 17.9378, "step": 23768 }, { "epoch": 0.43447822033743394, "grad_norm": 7.058536196305738, "learning_rate": 6.289469197112531e-06, "loss": 17.882, "step": 23769 }, { "epoch": 0.4344964995338805, "grad_norm": 6.365668185885612, "learning_rate": 6.2891831964204116e-06, "loss": 17.5266, "step": 23770 }, { "epoch": 0.434514778730327, "grad_norm": 6.198416145919927, "learning_rate": 6.28889719120982e-06, "loss": 17.3559, "step": 23771 }, { "epoch": 0.43453305792677355, "grad_norm": 7.714164527855773, "learning_rate": 6.288611181481754e-06, "loss": 18.0403, "step": 23772 }, { "epoch": 0.4345513371232201, "grad_norm": 6.179672552393671, "learning_rate": 6.288325167237219e-06, "loss": 17.4772, "step": 23773 }, { "epoch": 0.43456961631966656, "grad_norm": 6.9156131398090235, "learning_rate": 6.2880391484772166e-06, "loss": 17.4294, "step": 23774 }, { "epoch": 0.4345878955161131, "grad_norm": 8.725267812372703, "learning_rate": 6.287753125202744e-06, "loss": 18.3998, "step": 23775 }, { "epoch": 0.43460617471255963, "grad_norm": 7.209562219500126, "learning_rate": 6.287467097414815e-06, "loss": 17.7836, "step": 23776 }, { "epoch": 0.43462445390900617, "grad_norm": 6.774363317910752, "learning_rate": 6.287181065114421e-06, "loss": 17.3588, "step": 23777 }, { "epoch": 0.4346427331054527, "grad_norm": 7.268566592381338, "learning_rate": 6.286895028302571e-06, "loss": 17.9112, "step": 23778 }, { "epoch": 0.4346610123018992, "grad_norm": 7.904588477619206, "learning_rate": 6.286608986980265e-06, "loss": 18.0713, "step": 23779 }, { "epoch": 0.4346792914983457, "grad_norm": 6.923864533436848, "learning_rate": 6.2863229411485064e-06, "loss": 18.0276, "step": 23780 }, { "epoch": 0.43469757069479226, "grad_norm": 6.622258261032361, "learning_rate": 6.286036890808297e-06, "loss": 17.4968, "step": 23781 }, { "epoch": 0.4347158498912388, "grad_norm": 6.380302552900819, "learning_rate": 6.28575083596064e-06, "loss": 17.4397, "step": 23782 }, { "epoch": 0.43473412908768533, "grad_norm": 4.829702097401405, "learning_rate": 6.2854647766065395e-06, "loss": 16.9652, "step": 23783 }, { "epoch": 0.4347524082841318, "grad_norm": 6.179167919777229, "learning_rate": 6.2851787127469935e-06, "loss": 17.3528, "step": 23784 }, { "epoch": 0.43477068748057834, "grad_norm": 6.818738640862289, "learning_rate": 6.284892644383009e-06, "loss": 17.8505, "step": 23785 }, { "epoch": 0.4347889666770249, "grad_norm": 6.829626493712859, "learning_rate": 6.284606571515588e-06, "loss": 17.664, "step": 23786 }, { "epoch": 0.4348072458734714, "grad_norm": 6.360443457777423, "learning_rate": 6.284320494145732e-06, "loss": 17.6461, "step": 23787 }, { "epoch": 0.43482552506991795, "grad_norm": 6.773893232326095, "learning_rate": 6.284034412274445e-06, "loss": 17.6809, "step": 23788 }, { "epoch": 0.43484380426636443, "grad_norm": 6.30655400159841, "learning_rate": 6.283748325902726e-06, "loss": 17.5792, "step": 23789 }, { "epoch": 0.43486208346281097, "grad_norm": 11.005317080610869, "learning_rate": 6.283462235031583e-06, "loss": 17.8486, "step": 23790 }, { "epoch": 0.4348803626592575, "grad_norm": 7.58798293626356, "learning_rate": 6.283176139662016e-06, "loss": 18.1948, "step": 23791 }, { "epoch": 0.43489864185570404, "grad_norm": 6.606259008986435, "learning_rate": 6.282890039795027e-06, "loss": 17.6139, "step": 23792 }, { "epoch": 0.4349169210521506, "grad_norm": 7.7573673955048825, "learning_rate": 6.28260393543162e-06, "loss": 17.9213, "step": 23793 }, { "epoch": 0.43493520024859705, "grad_norm": 5.020639260069999, "learning_rate": 6.282317826572799e-06, "loss": 16.9778, "step": 23794 }, { "epoch": 0.4349534794450436, "grad_norm": 5.935445833306718, "learning_rate": 6.282031713219563e-06, "loss": 17.347, "step": 23795 }, { "epoch": 0.4349717586414901, "grad_norm": 6.553196914098264, "learning_rate": 6.281745595372919e-06, "loss": 17.6077, "step": 23796 }, { "epoch": 0.43499003783793666, "grad_norm": 6.682440196137849, "learning_rate": 6.281459473033867e-06, "loss": 17.7438, "step": 23797 }, { "epoch": 0.4350083170343832, "grad_norm": 5.695638352358544, "learning_rate": 6.2811733462034105e-06, "loss": 17.3181, "step": 23798 }, { "epoch": 0.4350265962308297, "grad_norm": 6.105545382268568, "learning_rate": 6.280887214882553e-06, "loss": 17.5573, "step": 23799 }, { "epoch": 0.4350448754272762, "grad_norm": 5.335979083358556, "learning_rate": 6.280601079072298e-06, "loss": 17.1656, "step": 23800 }, { "epoch": 0.43506315462372275, "grad_norm": 5.198904730747608, "learning_rate": 6.2803149387736464e-06, "loss": 17.2512, "step": 23801 }, { "epoch": 0.4350814338201693, "grad_norm": 7.323196392213511, "learning_rate": 6.2800287939876e-06, "loss": 18.1471, "step": 23802 }, { "epoch": 0.43509971301661576, "grad_norm": 8.272485293683951, "learning_rate": 6.279742644715166e-06, "loss": 18.2761, "step": 23803 }, { "epoch": 0.4351179922130623, "grad_norm": 6.603923717364079, "learning_rate": 6.279456490957346e-06, "loss": 17.671, "step": 23804 }, { "epoch": 0.43513627140950883, "grad_norm": 5.854976550362765, "learning_rate": 6.279170332715141e-06, "loss": 17.2637, "step": 23805 }, { "epoch": 0.43515455060595537, "grad_norm": 7.574194909508295, "learning_rate": 6.2788841699895545e-06, "loss": 18.2218, "step": 23806 }, { "epoch": 0.4351728298024019, "grad_norm": 7.174977581973314, "learning_rate": 6.278598002781591e-06, "loss": 17.6843, "step": 23807 }, { "epoch": 0.4351911089988484, "grad_norm": 6.157702531852071, "learning_rate": 6.278311831092251e-06, "loss": 17.5328, "step": 23808 }, { "epoch": 0.4352093881952949, "grad_norm": 6.641045909320454, "learning_rate": 6.278025654922539e-06, "loss": 17.6615, "step": 23809 }, { "epoch": 0.43522766739174146, "grad_norm": 6.868937306954469, "learning_rate": 6.2777394742734585e-06, "loss": 17.6829, "step": 23810 }, { "epoch": 0.435245946588188, "grad_norm": 6.527845149836127, "learning_rate": 6.277453289146013e-06, "loss": 17.4385, "step": 23811 }, { "epoch": 0.43526422578463453, "grad_norm": 6.613864051417975, "learning_rate": 6.277167099541204e-06, "loss": 17.6872, "step": 23812 }, { "epoch": 0.435282504981081, "grad_norm": 6.637978323084171, "learning_rate": 6.276880905460034e-06, "loss": 17.6415, "step": 23813 }, { "epoch": 0.43530078417752754, "grad_norm": 6.945530451501778, "learning_rate": 6.276594706903509e-06, "loss": 17.9313, "step": 23814 }, { "epoch": 0.4353190633739741, "grad_norm": 7.2642954581539145, "learning_rate": 6.276308503872629e-06, "loss": 17.8815, "step": 23815 }, { "epoch": 0.4353373425704206, "grad_norm": 6.325217827060266, "learning_rate": 6.2760222963683985e-06, "loss": 17.6147, "step": 23816 }, { "epoch": 0.43535562176686715, "grad_norm": 5.012434633454674, "learning_rate": 6.2757360843918204e-06, "loss": 17.1863, "step": 23817 }, { "epoch": 0.43537390096331363, "grad_norm": 6.864615915355431, "learning_rate": 6.2754498679438995e-06, "loss": 17.6262, "step": 23818 }, { "epoch": 0.43539218015976017, "grad_norm": 6.14360434336811, "learning_rate": 6.275163647025638e-06, "loss": 17.7933, "step": 23819 }, { "epoch": 0.4354104593562067, "grad_norm": 5.812737460701002, "learning_rate": 6.274877421638036e-06, "loss": 17.4681, "step": 23820 }, { "epoch": 0.43542873855265324, "grad_norm": 5.59308688335235, "learning_rate": 6.2745911917821e-06, "loss": 17.2346, "step": 23821 }, { "epoch": 0.4354470177490998, "grad_norm": 6.675402886746384, "learning_rate": 6.274304957458833e-06, "loss": 17.7082, "step": 23822 }, { "epoch": 0.43546529694554625, "grad_norm": 5.454848945652792, "learning_rate": 6.274018718669237e-06, "loss": 17.4234, "step": 23823 }, { "epoch": 0.4354835761419928, "grad_norm": 6.667585564411321, "learning_rate": 6.273732475414317e-06, "loss": 17.5829, "step": 23824 }, { "epoch": 0.4355018553384393, "grad_norm": 7.194824399986449, "learning_rate": 6.273446227695074e-06, "loss": 17.6354, "step": 23825 }, { "epoch": 0.43552013453488586, "grad_norm": 5.50968680104873, "learning_rate": 6.273159975512514e-06, "loss": 17.2895, "step": 23826 }, { "epoch": 0.4355384137313324, "grad_norm": 5.744403015190538, "learning_rate": 6.272873718867638e-06, "loss": 17.4208, "step": 23827 }, { "epoch": 0.4355566929277789, "grad_norm": 6.447591341865886, "learning_rate": 6.272587457761451e-06, "loss": 17.4899, "step": 23828 }, { "epoch": 0.4355749721242254, "grad_norm": 6.886341073026387, "learning_rate": 6.272301192194952e-06, "loss": 17.7471, "step": 23829 }, { "epoch": 0.43559325132067195, "grad_norm": 6.4278439276045605, "learning_rate": 6.272014922169151e-06, "loss": 17.5109, "step": 23830 }, { "epoch": 0.4356115305171185, "grad_norm": 6.051503140524521, "learning_rate": 6.271728647685047e-06, "loss": 17.3598, "step": 23831 }, { "epoch": 0.435629809713565, "grad_norm": 5.103946563957025, "learning_rate": 6.271442368743645e-06, "loss": 16.9667, "step": 23832 }, { "epoch": 0.4356480889100115, "grad_norm": 6.8230330003778095, "learning_rate": 6.271156085345949e-06, "loss": 17.5054, "step": 23833 }, { "epoch": 0.43566636810645804, "grad_norm": 6.123612548656846, "learning_rate": 6.270869797492958e-06, "loss": 17.6386, "step": 23834 }, { "epoch": 0.43568464730290457, "grad_norm": 6.524032878715726, "learning_rate": 6.270583505185681e-06, "loss": 17.4919, "step": 23835 }, { "epoch": 0.4357029264993511, "grad_norm": 8.047292619036462, "learning_rate": 6.270297208425119e-06, "loss": 18.374, "step": 23836 }, { "epoch": 0.4357212056957976, "grad_norm": 5.43341562292553, "learning_rate": 6.270010907212275e-06, "loss": 17.1471, "step": 23837 }, { "epoch": 0.4357394848922441, "grad_norm": 5.482138443011934, "learning_rate": 6.269724601548152e-06, "loss": 17.303, "step": 23838 }, { "epoch": 0.43575776408869066, "grad_norm": 5.984226886374791, "learning_rate": 6.269438291433756e-06, "loss": 17.2762, "step": 23839 }, { "epoch": 0.4357760432851372, "grad_norm": 7.463098994122314, "learning_rate": 6.269151976870088e-06, "loss": 17.9539, "step": 23840 }, { "epoch": 0.43579432248158373, "grad_norm": 5.454584015390502, "learning_rate": 6.268865657858153e-06, "loss": 17.3301, "step": 23841 }, { "epoch": 0.4358126016780302, "grad_norm": 6.536505420193189, "learning_rate": 6.268579334398954e-06, "loss": 17.5903, "step": 23842 }, { "epoch": 0.43583088087447674, "grad_norm": 7.288094103806399, "learning_rate": 6.268293006493493e-06, "loss": 17.8827, "step": 23843 }, { "epoch": 0.4358491600709233, "grad_norm": 7.773616907676997, "learning_rate": 6.268006674142777e-06, "loss": 17.6444, "step": 23844 }, { "epoch": 0.4358674392673698, "grad_norm": 7.666929283601415, "learning_rate": 6.2677203373478075e-06, "loss": 17.8615, "step": 23845 }, { "epoch": 0.43588571846381635, "grad_norm": 5.472988303768162, "learning_rate": 6.267433996109589e-06, "loss": 17.2007, "step": 23846 }, { "epoch": 0.43590399766026283, "grad_norm": 6.383812497393178, "learning_rate": 6.267147650429122e-06, "loss": 17.7053, "step": 23847 }, { "epoch": 0.43592227685670937, "grad_norm": 5.908331424186044, "learning_rate": 6.266861300307412e-06, "loss": 17.3104, "step": 23848 }, { "epoch": 0.4359405560531559, "grad_norm": 7.414114736635272, "learning_rate": 6.266574945745466e-06, "loss": 17.7896, "step": 23849 }, { "epoch": 0.43595883524960244, "grad_norm": 6.630469700512484, "learning_rate": 6.266288586744283e-06, "loss": 17.8471, "step": 23850 }, { "epoch": 0.435977114446049, "grad_norm": 6.498282735265571, "learning_rate": 6.266002223304869e-06, "loss": 17.6131, "step": 23851 }, { "epoch": 0.43599539364249545, "grad_norm": 5.1313976864739645, "learning_rate": 6.265715855428227e-06, "loss": 17.0579, "step": 23852 }, { "epoch": 0.436013672838942, "grad_norm": 6.155423500859613, "learning_rate": 6.26542948311536e-06, "loss": 17.4741, "step": 23853 }, { "epoch": 0.4360319520353885, "grad_norm": 9.077812198192008, "learning_rate": 6.265143106367273e-06, "loss": 18.3364, "step": 23854 }, { "epoch": 0.43605023123183506, "grad_norm": 6.2349520297556404, "learning_rate": 6.264856725184969e-06, "loss": 17.7364, "step": 23855 }, { "epoch": 0.4360685104282816, "grad_norm": 4.7269954324097725, "learning_rate": 6.264570339569452e-06, "loss": 16.9981, "step": 23856 }, { "epoch": 0.4360867896247281, "grad_norm": 7.295275529568781, "learning_rate": 6.264283949521725e-06, "loss": 17.8293, "step": 23857 }, { "epoch": 0.4361050688211746, "grad_norm": 6.532155770873544, "learning_rate": 6.263997555042793e-06, "loss": 17.7705, "step": 23858 }, { "epoch": 0.43612334801762115, "grad_norm": 8.064467076302083, "learning_rate": 6.263711156133662e-06, "loss": 18.0814, "step": 23859 }, { "epoch": 0.4361416272140677, "grad_norm": 7.302257298135736, "learning_rate": 6.263424752795331e-06, "loss": 17.988, "step": 23860 }, { "epoch": 0.4361599064105142, "grad_norm": 6.190354554040243, "learning_rate": 6.263138345028803e-06, "loss": 17.4445, "step": 23861 }, { "epoch": 0.4361781856069607, "grad_norm": 6.427026740535259, "learning_rate": 6.2628519328350876e-06, "loss": 17.4506, "step": 23862 }, { "epoch": 0.43619646480340724, "grad_norm": 6.372205394798012, "learning_rate": 6.262565516215187e-06, "loss": 17.6951, "step": 23863 }, { "epoch": 0.43621474399985377, "grad_norm": 8.06059915581161, "learning_rate": 6.2622790951701006e-06, "loss": 17.9144, "step": 23864 }, { "epoch": 0.4362330231963003, "grad_norm": 6.949122154778351, "learning_rate": 6.261992669700838e-06, "loss": 17.8636, "step": 23865 }, { "epoch": 0.43625130239274684, "grad_norm": 6.709881056579819, "learning_rate": 6.2617062398084e-06, "loss": 17.7407, "step": 23866 }, { "epoch": 0.4362695815891933, "grad_norm": 5.416059978746071, "learning_rate": 6.26141980549379e-06, "loss": 17.1129, "step": 23867 }, { "epoch": 0.43628786078563986, "grad_norm": 6.833323550308938, "learning_rate": 6.261133366758014e-06, "loss": 17.3961, "step": 23868 }, { "epoch": 0.4363061399820864, "grad_norm": 6.178535823968487, "learning_rate": 6.260846923602076e-06, "loss": 17.0604, "step": 23869 }, { "epoch": 0.43632441917853293, "grad_norm": 5.307022416040542, "learning_rate": 6.2605604760269755e-06, "loss": 17.076, "step": 23870 }, { "epoch": 0.4363426983749794, "grad_norm": 7.152625980662234, "learning_rate": 6.260274024033724e-06, "loss": 17.9072, "step": 23871 }, { "epoch": 0.43636097757142595, "grad_norm": 5.831215618894096, "learning_rate": 6.259987567623318e-06, "loss": 17.2684, "step": 23872 }, { "epoch": 0.4363792567678725, "grad_norm": 7.49625516486939, "learning_rate": 6.2597011067967674e-06, "loss": 18.1041, "step": 23873 }, { "epoch": 0.436397535964319, "grad_norm": 8.868963467919887, "learning_rate": 6.259414641555072e-06, "loss": 18.0595, "step": 23874 }, { "epoch": 0.43641581516076555, "grad_norm": 7.660085715575635, "learning_rate": 6.259128171899238e-06, "loss": 18.0545, "step": 23875 }, { "epoch": 0.43643409435721203, "grad_norm": 5.3908779914879235, "learning_rate": 6.258841697830271e-06, "loss": 17.1993, "step": 23876 }, { "epoch": 0.43645237355365857, "grad_norm": 5.633074670352394, "learning_rate": 6.2585552193491715e-06, "loss": 17.0302, "step": 23877 }, { "epoch": 0.4364706527501051, "grad_norm": 6.96628031102791, "learning_rate": 6.258268736456945e-06, "loss": 17.6736, "step": 23878 }, { "epoch": 0.43648893194655164, "grad_norm": 7.111966315222163, "learning_rate": 6.257982249154596e-06, "loss": 17.6331, "step": 23879 }, { "epoch": 0.4365072111429982, "grad_norm": 6.989383445396583, "learning_rate": 6.257695757443128e-06, "loss": 17.7052, "step": 23880 }, { "epoch": 0.43652549033944466, "grad_norm": 6.529517624966005, "learning_rate": 6.257409261323546e-06, "loss": 17.5059, "step": 23881 }, { "epoch": 0.4365437695358912, "grad_norm": 5.992859896160147, "learning_rate": 6.257122760796853e-06, "loss": 17.1186, "step": 23882 }, { "epoch": 0.4365620487323377, "grad_norm": 7.781229654134481, "learning_rate": 6.256836255864054e-06, "loss": 17.7622, "step": 23883 }, { "epoch": 0.43658032792878426, "grad_norm": 6.4979118999299725, "learning_rate": 6.256549746526154e-06, "loss": 17.6442, "step": 23884 }, { "epoch": 0.4365986071252308, "grad_norm": 8.421286888859038, "learning_rate": 6.2562632327841545e-06, "loss": 17.7855, "step": 23885 }, { "epoch": 0.4366168863216773, "grad_norm": 9.54913063909957, "learning_rate": 6.2559767146390626e-06, "loss": 18.0372, "step": 23886 }, { "epoch": 0.4366351655181238, "grad_norm": 6.965151729683479, "learning_rate": 6.255690192091882e-06, "loss": 17.5536, "step": 23887 }, { "epoch": 0.43665344471457035, "grad_norm": 10.681850626360164, "learning_rate": 6.255403665143615e-06, "loss": 17.6952, "step": 23888 }, { "epoch": 0.4366717239110169, "grad_norm": 5.258765641273252, "learning_rate": 6.255117133795266e-06, "loss": 17.033, "step": 23889 }, { "epoch": 0.4366900031074634, "grad_norm": 6.665458575687888, "learning_rate": 6.254830598047843e-06, "loss": 17.5997, "step": 23890 }, { "epoch": 0.4367082823039099, "grad_norm": 6.035690424981967, "learning_rate": 6.254544057902347e-06, "loss": 17.4735, "step": 23891 }, { "epoch": 0.43672656150035644, "grad_norm": 6.474421956015781, "learning_rate": 6.254257513359781e-06, "loss": 17.5113, "step": 23892 }, { "epoch": 0.43674484069680297, "grad_norm": 8.314695596987976, "learning_rate": 6.253970964421152e-06, "loss": 18.2821, "step": 23893 }, { "epoch": 0.4367631198932495, "grad_norm": 6.256334127436358, "learning_rate": 6.253684411087465e-06, "loss": 17.3606, "step": 23894 }, { "epoch": 0.43678139908969604, "grad_norm": 7.6796115714427895, "learning_rate": 6.253397853359723e-06, "loss": 18.5128, "step": 23895 }, { "epoch": 0.4367996782861425, "grad_norm": 6.795941357264282, "learning_rate": 6.253111291238929e-06, "loss": 17.7538, "step": 23896 }, { "epoch": 0.43681795748258906, "grad_norm": 7.25199643861381, "learning_rate": 6.2528247247260885e-06, "loss": 17.594, "step": 23897 }, { "epoch": 0.4368362366790356, "grad_norm": 6.372996474514302, "learning_rate": 6.252538153822206e-06, "loss": 17.2522, "step": 23898 }, { "epoch": 0.43685451587548213, "grad_norm": 6.026701428622392, "learning_rate": 6.252251578528287e-06, "loss": 17.3368, "step": 23899 }, { "epoch": 0.43687279507192867, "grad_norm": 4.761477883977416, "learning_rate": 6.2519649988453345e-06, "loss": 16.8937, "step": 23900 }, { "epoch": 0.43689107426837515, "grad_norm": 6.334667458315501, "learning_rate": 6.251678414774354e-06, "loss": 17.3993, "step": 23901 }, { "epoch": 0.4369093534648217, "grad_norm": 8.493483046830853, "learning_rate": 6.251391826316348e-06, "loss": 18.5209, "step": 23902 }, { "epoch": 0.4369276326612682, "grad_norm": 5.988722862784205, "learning_rate": 6.2511052334723225e-06, "loss": 17.2655, "step": 23903 }, { "epoch": 0.43694591185771475, "grad_norm": 7.315354498610283, "learning_rate": 6.250818636243283e-06, "loss": 17.8763, "step": 23904 }, { "epoch": 0.43696419105416123, "grad_norm": 6.248714930340277, "learning_rate": 6.250532034630231e-06, "loss": 17.2859, "step": 23905 }, { "epoch": 0.43698247025060777, "grad_norm": 5.514940662980491, "learning_rate": 6.250245428634174e-06, "loss": 17.0152, "step": 23906 }, { "epoch": 0.4370007494470543, "grad_norm": 5.657675618127949, "learning_rate": 6.249958818256115e-06, "loss": 17.3315, "step": 23907 }, { "epoch": 0.43701902864350084, "grad_norm": 6.360635433025995, "learning_rate": 6.249672203497058e-06, "loss": 17.7566, "step": 23908 }, { "epoch": 0.4370373078399474, "grad_norm": 6.008786704469323, "learning_rate": 6.249385584358009e-06, "loss": 17.3999, "step": 23909 }, { "epoch": 0.43705558703639386, "grad_norm": 7.33142682619034, "learning_rate": 6.249098960839972e-06, "loss": 17.7921, "step": 23910 }, { "epoch": 0.4370738662328404, "grad_norm": 6.371861169270015, "learning_rate": 6.248812332943951e-06, "loss": 17.5232, "step": 23911 }, { "epoch": 0.4370921454292869, "grad_norm": 5.869601600415608, "learning_rate": 6.248525700670951e-06, "loss": 17.4915, "step": 23912 }, { "epoch": 0.43711042462573346, "grad_norm": 5.525187376485091, "learning_rate": 6.248239064021977e-06, "loss": 17.2771, "step": 23913 }, { "epoch": 0.43712870382218, "grad_norm": 6.488219510405763, "learning_rate": 6.247952422998035e-06, "loss": 17.3501, "step": 23914 }, { "epoch": 0.4371469830186265, "grad_norm": 5.909557666655518, "learning_rate": 6.247665777600127e-06, "loss": 17.2575, "step": 23915 }, { "epoch": 0.437165262215073, "grad_norm": 5.06380006176535, "learning_rate": 6.247379127829257e-06, "loss": 16.8698, "step": 23916 }, { "epoch": 0.43718354141151955, "grad_norm": 5.905539963323596, "learning_rate": 6.247092473686432e-06, "loss": 17.1606, "step": 23917 }, { "epoch": 0.4372018206079661, "grad_norm": 6.807182451339311, "learning_rate": 6.246805815172659e-06, "loss": 17.4176, "step": 23918 }, { "epoch": 0.4372200998044126, "grad_norm": 6.221123397001821, "learning_rate": 6.246519152288937e-06, "loss": 17.6172, "step": 23919 }, { "epoch": 0.4372383790008591, "grad_norm": 6.212224300995334, "learning_rate": 6.246232485036275e-06, "loss": 17.3482, "step": 23920 }, { "epoch": 0.43725665819730564, "grad_norm": 7.823034045524622, "learning_rate": 6.2459458134156745e-06, "loss": 18.1664, "step": 23921 }, { "epoch": 0.43727493739375217, "grad_norm": 7.033783182189167, "learning_rate": 6.2456591374281435e-06, "loss": 17.8423, "step": 23922 }, { "epoch": 0.4372932165901987, "grad_norm": 5.502804655149759, "learning_rate": 6.245372457074685e-06, "loss": 17.0922, "step": 23923 }, { "epoch": 0.43731149578664524, "grad_norm": 6.576363494862701, "learning_rate": 6.245085772356304e-06, "loss": 17.6938, "step": 23924 }, { "epoch": 0.4373297749830917, "grad_norm": 6.067144789740839, "learning_rate": 6.244799083274004e-06, "loss": 17.3229, "step": 23925 }, { "epoch": 0.43734805417953826, "grad_norm": 6.394717174482385, "learning_rate": 6.244512389828794e-06, "loss": 17.4762, "step": 23926 }, { "epoch": 0.4373663333759848, "grad_norm": 5.516036442517155, "learning_rate": 6.244225692021675e-06, "loss": 17.2713, "step": 23927 }, { "epoch": 0.43738461257243133, "grad_norm": 8.086540536894024, "learning_rate": 6.243938989853653e-06, "loss": 17.7486, "step": 23928 }, { "epoch": 0.43740289176887787, "grad_norm": 5.642024582760483, "learning_rate": 6.2436522833257314e-06, "loss": 17.1427, "step": 23929 }, { "epoch": 0.43742117096532435, "grad_norm": 7.3292519349256535, "learning_rate": 6.2433655724389175e-06, "loss": 17.8424, "step": 23930 }, { "epoch": 0.4374394501617709, "grad_norm": 5.842609832264661, "learning_rate": 6.243078857194215e-06, "loss": 17.1244, "step": 23931 }, { "epoch": 0.4374577293582174, "grad_norm": 8.550158140001454, "learning_rate": 6.24279213759263e-06, "loss": 18.3504, "step": 23932 }, { "epoch": 0.43747600855466395, "grad_norm": 6.4760633544487956, "learning_rate": 6.242505413635166e-06, "loss": 17.5759, "step": 23933 }, { "epoch": 0.4374942877511105, "grad_norm": 6.731350216811516, "learning_rate": 6.242218685322826e-06, "loss": 17.5102, "step": 23934 }, { "epoch": 0.43751256694755697, "grad_norm": 5.788240053547558, "learning_rate": 6.24193195265662e-06, "loss": 17.1085, "step": 23935 }, { "epoch": 0.4375308461440035, "grad_norm": 7.5396417647391445, "learning_rate": 6.24164521563755e-06, "loss": 18.0382, "step": 23936 }, { "epoch": 0.43754912534045004, "grad_norm": 4.642640229587409, "learning_rate": 6.241358474266621e-06, "loss": 16.8847, "step": 23937 }, { "epoch": 0.4375674045368966, "grad_norm": 4.759367897182057, "learning_rate": 6.241071728544837e-06, "loss": 16.7752, "step": 23938 }, { "epoch": 0.43758568373334306, "grad_norm": 6.08158858127298, "learning_rate": 6.240784978473206e-06, "loss": 17.1645, "step": 23939 }, { "epoch": 0.4376039629297896, "grad_norm": 5.450517691313575, "learning_rate": 6.2404982240527305e-06, "loss": 17.2829, "step": 23940 }, { "epoch": 0.4376222421262361, "grad_norm": 7.3670376610069255, "learning_rate": 6.240211465284416e-06, "loss": 18.0665, "step": 23941 }, { "epoch": 0.43764052132268266, "grad_norm": 6.087789442483752, "learning_rate": 6.23992470216927e-06, "loss": 17.3223, "step": 23942 }, { "epoch": 0.4376588005191292, "grad_norm": 7.9833410695256575, "learning_rate": 6.2396379347082925e-06, "loss": 18.3517, "step": 23943 }, { "epoch": 0.4376770797155757, "grad_norm": 5.848633507256055, "learning_rate": 6.239351162902493e-06, "loss": 17.4503, "step": 23944 }, { "epoch": 0.4376953589120222, "grad_norm": 5.796874455548671, "learning_rate": 6.239064386752876e-06, "loss": 17.2623, "step": 23945 }, { "epoch": 0.43771363810846875, "grad_norm": 7.67556505670471, "learning_rate": 6.2387776062604454e-06, "loss": 18.3013, "step": 23946 }, { "epoch": 0.4377319173049153, "grad_norm": 5.207616322576824, "learning_rate": 6.238490821426206e-06, "loss": 16.9911, "step": 23947 }, { "epoch": 0.4377501965013618, "grad_norm": 6.335479282535727, "learning_rate": 6.238204032251163e-06, "loss": 17.1, "step": 23948 }, { "epoch": 0.4377684756978083, "grad_norm": 9.483746020749363, "learning_rate": 6.237917238736325e-06, "loss": 18.4088, "step": 23949 }, { "epoch": 0.43778675489425484, "grad_norm": 6.349133558818311, "learning_rate": 6.237630440882693e-06, "loss": 17.7754, "step": 23950 }, { "epoch": 0.4378050340907014, "grad_norm": 7.326353212586243, "learning_rate": 6.237343638691273e-06, "loss": 17.7348, "step": 23951 }, { "epoch": 0.4378233132871479, "grad_norm": 5.381147450195405, "learning_rate": 6.237056832163072e-06, "loss": 16.8319, "step": 23952 }, { "epoch": 0.43784159248359444, "grad_norm": 7.024108480865005, "learning_rate": 6.236770021299093e-06, "loss": 17.6267, "step": 23953 }, { "epoch": 0.4378598716800409, "grad_norm": 5.617575326960354, "learning_rate": 6.236483206100344e-06, "loss": 17.1481, "step": 23954 }, { "epoch": 0.43787815087648746, "grad_norm": 11.712267076978886, "learning_rate": 6.236196386567828e-06, "loss": 18.2683, "step": 23955 }, { "epoch": 0.437896430072934, "grad_norm": 8.004055374621178, "learning_rate": 6.23590956270255e-06, "loss": 18.5959, "step": 23956 }, { "epoch": 0.43791470926938053, "grad_norm": 6.343183840251039, "learning_rate": 6.2356227345055175e-06, "loss": 17.7577, "step": 23957 }, { "epoch": 0.43793298846582707, "grad_norm": 7.599136735195646, "learning_rate": 6.2353359019777335e-06, "loss": 17.7117, "step": 23958 }, { "epoch": 0.43795126766227355, "grad_norm": 5.9030588525894885, "learning_rate": 6.235049065120207e-06, "loss": 17.3532, "step": 23959 }, { "epoch": 0.4379695468587201, "grad_norm": 8.139557562651724, "learning_rate": 6.2347622239339376e-06, "loss": 17.592, "step": 23960 }, { "epoch": 0.4379878260551666, "grad_norm": 6.3013355629707055, "learning_rate": 6.234475378419934e-06, "loss": 17.6954, "step": 23961 }, { "epoch": 0.43800610525161315, "grad_norm": 5.630351907541568, "learning_rate": 6.234188528579202e-06, "loss": 17.0036, "step": 23962 }, { "epoch": 0.4380243844480597, "grad_norm": 5.78054772288933, "learning_rate": 6.233901674412748e-06, "loss": 17.3421, "step": 23963 }, { "epoch": 0.43804266364450617, "grad_norm": 5.959559681237487, "learning_rate": 6.2336148159215735e-06, "loss": 17.3405, "step": 23964 }, { "epoch": 0.4380609428409527, "grad_norm": 6.7847804685119, "learning_rate": 6.233327953106687e-06, "loss": 17.6707, "step": 23965 }, { "epoch": 0.43807922203739924, "grad_norm": 7.25773349149983, "learning_rate": 6.233041085969092e-06, "loss": 17.8941, "step": 23966 }, { "epoch": 0.4380975012338458, "grad_norm": 7.789015459486722, "learning_rate": 6.232754214509796e-06, "loss": 18.152, "step": 23967 }, { "epoch": 0.4381157804302923, "grad_norm": 5.943583740901636, "learning_rate": 6.232467338729803e-06, "loss": 17.2675, "step": 23968 }, { "epoch": 0.4381340596267388, "grad_norm": 7.1440296579959455, "learning_rate": 6.232180458630119e-06, "loss": 17.6182, "step": 23969 }, { "epoch": 0.43815233882318533, "grad_norm": 6.2490591830604805, "learning_rate": 6.231893574211749e-06, "loss": 17.4543, "step": 23970 }, { "epoch": 0.43817061801963186, "grad_norm": 6.5830397561092004, "learning_rate": 6.231606685475701e-06, "loss": 17.7279, "step": 23971 }, { "epoch": 0.4381888972160784, "grad_norm": 7.437304042880089, "learning_rate": 6.231319792422977e-06, "loss": 17.8313, "step": 23972 }, { "epoch": 0.4382071764125249, "grad_norm": 6.352752017127395, "learning_rate": 6.231032895054584e-06, "loss": 17.6606, "step": 23973 }, { "epoch": 0.4382254556089714, "grad_norm": 5.6922561812502535, "learning_rate": 6.230745993371528e-06, "loss": 17.383, "step": 23974 }, { "epoch": 0.43824373480541795, "grad_norm": 5.680137833276108, "learning_rate": 6.2304590873748115e-06, "loss": 17.2924, "step": 23975 }, { "epoch": 0.4382620140018645, "grad_norm": 6.695873536494616, "learning_rate": 6.230172177065445e-06, "loss": 17.8808, "step": 23976 }, { "epoch": 0.438280293198311, "grad_norm": 6.708773107718349, "learning_rate": 6.229885262444433e-06, "loss": 17.6513, "step": 23977 }, { "epoch": 0.4382985723947575, "grad_norm": 6.676344055924407, "learning_rate": 6.229598343512777e-06, "loss": 17.6905, "step": 23978 }, { "epoch": 0.43831685159120404, "grad_norm": 6.201313962599765, "learning_rate": 6.229311420271488e-06, "loss": 17.547, "step": 23979 }, { "epoch": 0.4383351307876506, "grad_norm": 6.183536383194596, "learning_rate": 6.229024492721567e-06, "loss": 17.4816, "step": 23980 }, { "epoch": 0.4383534099840971, "grad_norm": 5.639793513166326, "learning_rate": 6.228737560864024e-06, "loss": 17.3323, "step": 23981 }, { "epoch": 0.43837168918054364, "grad_norm": 7.02600896370297, "learning_rate": 6.22845062469986e-06, "loss": 17.6568, "step": 23982 }, { "epoch": 0.4383899683769901, "grad_norm": 5.201563695186877, "learning_rate": 6.228163684230084e-06, "loss": 17.1166, "step": 23983 }, { "epoch": 0.43840824757343666, "grad_norm": 6.3069927927281295, "learning_rate": 6.227876739455702e-06, "loss": 17.5435, "step": 23984 }, { "epoch": 0.4384265267698832, "grad_norm": 6.684435791059988, "learning_rate": 6.227589790377717e-06, "loss": 17.5835, "step": 23985 }, { "epoch": 0.43844480596632973, "grad_norm": 6.139683812962959, "learning_rate": 6.2273028369971375e-06, "loss": 17.2079, "step": 23986 }, { "epoch": 0.43846308516277627, "grad_norm": 6.744759916293599, "learning_rate": 6.2270158793149696e-06, "loss": 17.8278, "step": 23987 }, { "epoch": 0.43848136435922275, "grad_norm": 6.103049014534535, "learning_rate": 6.226728917332215e-06, "loss": 17.4248, "step": 23988 }, { "epoch": 0.4384996435556693, "grad_norm": 6.857580617955428, "learning_rate": 6.226441951049882e-06, "loss": 17.6062, "step": 23989 }, { "epoch": 0.4385179227521158, "grad_norm": 8.286891018914536, "learning_rate": 6.226154980468978e-06, "loss": 18.1121, "step": 23990 }, { "epoch": 0.43853620194856235, "grad_norm": 6.281215225087562, "learning_rate": 6.225868005590506e-06, "loss": 17.5584, "step": 23991 }, { "epoch": 0.4385544811450089, "grad_norm": 7.280953450911573, "learning_rate": 6.225581026415473e-06, "loss": 17.7032, "step": 23992 }, { "epoch": 0.43857276034145537, "grad_norm": 6.979399398605442, "learning_rate": 6.225294042944884e-06, "loss": 17.6486, "step": 23993 }, { "epoch": 0.4385910395379019, "grad_norm": 6.638458640357061, "learning_rate": 6.225007055179748e-06, "loss": 17.723, "step": 23994 }, { "epoch": 0.43860931873434844, "grad_norm": 6.057993657988643, "learning_rate": 6.224720063121067e-06, "loss": 17.2632, "step": 23995 }, { "epoch": 0.438627597930795, "grad_norm": 6.21037187523833, "learning_rate": 6.224433066769849e-06, "loss": 17.6976, "step": 23996 }, { "epoch": 0.4386458771272415, "grad_norm": 6.595796145271207, "learning_rate": 6.224146066127099e-06, "loss": 17.8447, "step": 23997 }, { "epoch": 0.438664156323688, "grad_norm": 8.278190147439984, "learning_rate": 6.2238590611938234e-06, "loss": 17.9456, "step": 23998 }, { "epoch": 0.43868243552013453, "grad_norm": 5.362890817208312, "learning_rate": 6.223572051971027e-06, "loss": 17.3774, "step": 23999 }, { "epoch": 0.43870071471658106, "grad_norm": 5.991677612466831, "learning_rate": 6.223285038459719e-06, "loss": 17.4217, "step": 24000 }, { "epoch": 0.4387189939130276, "grad_norm": 6.0251617144174405, "learning_rate": 6.222998020660903e-06, "loss": 17.4952, "step": 24001 }, { "epoch": 0.43873727310947414, "grad_norm": 6.764376032096751, "learning_rate": 6.222710998575583e-06, "loss": 17.6979, "step": 24002 }, { "epoch": 0.4387555523059206, "grad_norm": 5.86926991320708, "learning_rate": 6.222423972204768e-06, "loss": 17.1538, "step": 24003 }, { "epoch": 0.43877383150236715, "grad_norm": 7.762880804805859, "learning_rate": 6.222136941549464e-06, "loss": 17.8571, "step": 24004 }, { "epoch": 0.4387921106988137, "grad_norm": 7.059405127136602, "learning_rate": 6.221849906610674e-06, "loss": 17.3909, "step": 24005 }, { "epoch": 0.4388103898952602, "grad_norm": 6.2189132962688705, "learning_rate": 6.221562867389408e-06, "loss": 17.4762, "step": 24006 }, { "epoch": 0.4388286690917067, "grad_norm": 7.639833437389585, "learning_rate": 6.221275823886669e-06, "loss": 18.0506, "step": 24007 }, { "epoch": 0.43884694828815324, "grad_norm": 5.459067405090136, "learning_rate": 6.220988776103465e-06, "loss": 17.1166, "step": 24008 }, { "epoch": 0.4388652274845998, "grad_norm": 6.609410233825979, "learning_rate": 6.220701724040801e-06, "loss": 17.6673, "step": 24009 }, { "epoch": 0.4388835066810463, "grad_norm": 6.431944664243485, "learning_rate": 6.220414667699682e-06, "loss": 17.5546, "step": 24010 }, { "epoch": 0.43890178587749284, "grad_norm": 6.221324915602247, "learning_rate": 6.220127607081117e-06, "loss": 17.1842, "step": 24011 }, { "epoch": 0.4389200650739393, "grad_norm": 7.07345777971569, "learning_rate": 6.219840542186111e-06, "loss": 17.7193, "step": 24012 }, { "epoch": 0.43893834427038586, "grad_norm": 7.0063190363877865, "learning_rate": 6.219553473015668e-06, "loss": 17.4253, "step": 24013 }, { "epoch": 0.4389566234668324, "grad_norm": 6.2546168603984595, "learning_rate": 6.219266399570798e-06, "loss": 17.2838, "step": 24014 }, { "epoch": 0.43897490266327893, "grad_norm": 6.258635538417954, "learning_rate": 6.218979321852503e-06, "loss": 17.4274, "step": 24015 }, { "epoch": 0.43899318185972547, "grad_norm": 7.169503195365026, "learning_rate": 6.218692239861793e-06, "loss": 17.9354, "step": 24016 }, { "epoch": 0.43901146105617195, "grad_norm": 6.601886440297481, "learning_rate": 6.218405153599671e-06, "loss": 17.5558, "step": 24017 }, { "epoch": 0.4390297402526185, "grad_norm": 6.373022725769502, "learning_rate": 6.218118063067147e-06, "loss": 17.4031, "step": 24018 }, { "epoch": 0.439048019449065, "grad_norm": 6.5719079781718115, "learning_rate": 6.2178309682652235e-06, "loss": 17.7041, "step": 24019 }, { "epoch": 0.43906629864551155, "grad_norm": 7.017985430227371, "learning_rate": 6.2175438691949065e-06, "loss": 17.6017, "step": 24020 }, { "epoch": 0.4390845778419581, "grad_norm": 6.251060702893441, "learning_rate": 6.217256765857207e-06, "loss": 17.2571, "step": 24021 }, { "epoch": 0.43910285703840457, "grad_norm": 6.367375870634299, "learning_rate": 6.216969658253125e-06, "loss": 17.5228, "step": 24022 }, { "epoch": 0.4391211362348511, "grad_norm": 6.377043967704567, "learning_rate": 6.216682546383672e-06, "loss": 17.8246, "step": 24023 }, { "epoch": 0.43913941543129764, "grad_norm": 7.771306717169053, "learning_rate": 6.216395430249852e-06, "loss": 18.4007, "step": 24024 }, { "epoch": 0.4391576946277442, "grad_norm": 6.000973498365739, "learning_rate": 6.216108309852672e-06, "loss": 17.0641, "step": 24025 }, { "epoch": 0.4391759738241907, "grad_norm": 7.008358668382307, "learning_rate": 6.215821185193137e-06, "loss": 17.7032, "step": 24026 }, { "epoch": 0.4391942530206372, "grad_norm": 6.928903847663157, "learning_rate": 6.215534056272254e-06, "loss": 17.4494, "step": 24027 }, { "epoch": 0.43921253221708373, "grad_norm": 5.625535482139369, "learning_rate": 6.215246923091032e-06, "loss": 17.5746, "step": 24028 }, { "epoch": 0.43923081141353026, "grad_norm": 6.9614181128989445, "learning_rate": 6.214959785650472e-06, "loss": 18.0524, "step": 24029 }, { "epoch": 0.4392490906099768, "grad_norm": 7.409461275741407, "learning_rate": 6.214672643951584e-06, "loss": 18.0821, "step": 24030 }, { "epoch": 0.43926736980642334, "grad_norm": 6.029300780715063, "learning_rate": 6.214385497995374e-06, "loss": 17.2179, "step": 24031 }, { "epoch": 0.4392856490028698, "grad_norm": 6.11037002292352, "learning_rate": 6.214098347782849e-06, "loss": 17.4044, "step": 24032 }, { "epoch": 0.43930392819931635, "grad_norm": 5.834711278241937, "learning_rate": 6.213811193315015e-06, "loss": 17.2023, "step": 24033 }, { "epoch": 0.4393222073957629, "grad_norm": 6.200046138205611, "learning_rate": 6.213524034592875e-06, "loss": 17.4785, "step": 24034 }, { "epoch": 0.4393404865922094, "grad_norm": 8.000951197088606, "learning_rate": 6.213236871617442e-06, "loss": 18.1242, "step": 24035 }, { "epoch": 0.43935876578865596, "grad_norm": 5.27557532248393, "learning_rate": 6.212949704389718e-06, "loss": 17.0796, "step": 24036 }, { "epoch": 0.43937704498510244, "grad_norm": 5.58697925314592, "learning_rate": 6.21266253291071e-06, "loss": 17.3713, "step": 24037 }, { "epoch": 0.439395324181549, "grad_norm": 6.952475108885502, "learning_rate": 6.212375357181426e-06, "loss": 17.6035, "step": 24038 }, { "epoch": 0.4394136033779955, "grad_norm": 6.9391452999085015, "learning_rate": 6.21208817720287e-06, "loss": 17.688, "step": 24039 }, { "epoch": 0.43943188257444205, "grad_norm": 6.731781520110059, "learning_rate": 6.211800992976051e-06, "loss": 17.5839, "step": 24040 }, { "epoch": 0.4394501617708885, "grad_norm": 6.403488127370576, "learning_rate": 6.211513804501975e-06, "loss": 17.6752, "step": 24041 }, { "epoch": 0.43946844096733506, "grad_norm": 5.8170076131586255, "learning_rate": 6.211226611781649e-06, "loss": 17.202, "step": 24042 }, { "epoch": 0.4394867201637816, "grad_norm": 6.704862089039464, "learning_rate": 6.2109394148160774e-06, "loss": 17.8916, "step": 24043 }, { "epoch": 0.43950499936022813, "grad_norm": 6.6130829714901695, "learning_rate": 6.210652213606269e-06, "loss": 17.7099, "step": 24044 }, { "epoch": 0.43952327855667467, "grad_norm": 5.804196571886088, "learning_rate": 6.21036500815323e-06, "loss": 17.314, "step": 24045 }, { "epoch": 0.43954155775312115, "grad_norm": 5.870295302515808, "learning_rate": 6.2100777984579655e-06, "loss": 17.4325, "step": 24046 }, { "epoch": 0.4395598369495677, "grad_norm": 5.406245643499944, "learning_rate": 6.209790584521483e-06, "loss": 17.091, "step": 24047 }, { "epoch": 0.4395781161460142, "grad_norm": 5.301351690539633, "learning_rate": 6.20950336634479e-06, "loss": 16.9648, "step": 24048 }, { "epoch": 0.43959639534246076, "grad_norm": 6.142972290332052, "learning_rate": 6.209216143928895e-06, "loss": 17.133, "step": 24049 }, { "epoch": 0.4396146745389073, "grad_norm": 5.772682067366621, "learning_rate": 6.208928917274799e-06, "loss": 17.4193, "step": 24050 }, { "epoch": 0.43963295373535377, "grad_norm": 6.561133445600708, "learning_rate": 6.2086416863835145e-06, "loss": 17.5559, "step": 24051 }, { "epoch": 0.4396512329318003, "grad_norm": 7.225885480856654, "learning_rate": 6.2083544512560434e-06, "loss": 17.5924, "step": 24052 }, { "epoch": 0.43966951212824684, "grad_norm": 6.301709506262397, "learning_rate": 6.208067211893396e-06, "loss": 17.5383, "step": 24053 }, { "epoch": 0.4396877913246934, "grad_norm": 6.2750822658107355, "learning_rate": 6.207779968296578e-06, "loss": 17.3671, "step": 24054 }, { "epoch": 0.4397060705211399, "grad_norm": 5.6883360896097, "learning_rate": 6.207492720466596e-06, "loss": 17.2641, "step": 24055 }, { "epoch": 0.4397243497175864, "grad_norm": 5.160428857090913, "learning_rate": 6.207205468404457e-06, "loss": 17.0549, "step": 24056 }, { "epoch": 0.43974262891403293, "grad_norm": 6.177780461799774, "learning_rate": 6.206918212111167e-06, "loss": 17.3782, "step": 24057 }, { "epoch": 0.43976090811047946, "grad_norm": 6.555941508063238, "learning_rate": 6.2066309515877334e-06, "loss": 17.5858, "step": 24058 }, { "epoch": 0.439779187306926, "grad_norm": 7.982207387034825, "learning_rate": 6.206343686835165e-06, "loss": 18.1021, "step": 24059 }, { "epoch": 0.43979746650337254, "grad_norm": 6.827920589372808, "learning_rate": 6.206056417854464e-06, "loss": 17.5228, "step": 24060 }, { "epoch": 0.439815745699819, "grad_norm": 5.243004926423027, "learning_rate": 6.205769144646641e-06, "loss": 17.1173, "step": 24061 }, { "epoch": 0.43983402489626555, "grad_norm": 6.091493836617262, "learning_rate": 6.205481867212701e-06, "loss": 17.4518, "step": 24062 }, { "epoch": 0.4398523040927121, "grad_norm": 5.630099018895974, "learning_rate": 6.205194585553653e-06, "loss": 17.106, "step": 24063 }, { "epoch": 0.4398705832891586, "grad_norm": 5.488712327693138, "learning_rate": 6.204907299670502e-06, "loss": 17.0807, "step": 24064 }, { "epoch": 0.43988886248560516, "grad_norm": 8.329318893114538, "learning_rate": 6.204620009564255e-06, "loss": 18.4138, "step": 24065 }, { "epoch": 0.43990714168205164, "grad_norm": 6.201264196117154, "learning_rate": 6.20433271523592e-06, "loss": 17.3556, "step": 24066 }, { "epoch": 0.4399254208784982, "grad_norm": 7.803245184763667, "learning_rate": 6.204045416686503e-06, "loss": 18.0564, "step": 24067 }, { "epoch": 0.4399437000749447, "grad_norm": 5.848519091575874, "learning_rate": 6.203758113917011e-06, "loss": 17.3502, "step": 24068 }, { "epoch": 0.43996197927139125, "grad_norm": 5.450613093420574, "learning_rate": 6.2034708069284525e-06, "loss": 17.0774, "step": 24069 }, { "epoch": 0.4399802584678378, "grad_norm": 7.0861272134943984, "learning_rate": 6.2031834957218314e-06, "loss": 17.7915, "step": 24070 }, { "epoch": 0.43999853766428426, "grad_norm": 6.2486276449081695, "learning_rate": 6.202896180298158e-06, "loss": 17.4001, "step": 24071 }, { "epoch": 0.4400168168607308, "grad_norm": 5.976318337781716, "learning_rate": 6.202608860658438e-06, "loss": 17.5905, "step": 24072 }, { "epoch": 0.44003509605717733, "grad_norm": 4.9373641132000525, "learning_rate": 6.2023215368036785e-06, "loss": 16.9522, "step": 24073 }, { "epoch": 0.44005337525362387, "grad_norm": 6.556857284850879, "learning_rate": 6.2020342087348854e-06, "loss": 17.6768, "step": 24074 }, { "epoch": 0.44007165445007035, "grad_norm": 7.061485093126709, "learning_rate": 6.201746876453066e-06, "loss": 18.1163, "step": 24075 }, { "epoch": 0.4400899336465169, "grad_norm": 6.027018540386245, "learning_rate": 6.201459539959229e-06, "loss": 17.4606, "step": 24076 }, { "epoch": 0.4401082128429634, "grad_norm": 6.039365159304191, "learning_rate": 6.2011721992543814e-06, "loss": 17.695, "step": 24077 }, { "epoch": 0.44012649203940996, "grad_norm": 7.1681031907290125, "learning_rate": 6.200884854339529e-06, "loss": 17.8596, "step": 24078 }, { "epoch": 0.4401447712358565, "grad_norm": 6.082145313149376, "learning_rate": 6.2005975052156784e-06, "loss": 17.4135, "step": 24079 }, { "epoch": 0.44016305043230297, "grad_norm": 6.230506107679308, "learning_rate": 6.200310151883838e-06, "loss": 17.5036, "step": 24080 }, { "epoch": 0.4401813296287495, "grad_norm": 5.464460759241678, "learning_rate": 6.200022794345015e-06, "loss": 17.2063, "step": 24081 }, { "epoch": 0.44019960882519604, "grad_norm": 6.622869756721586, "learning_rate": 6.199735432600216e-06, "loss": 17.5457, "step": 24082 }, { "epoch": 0.4402178880216426, "grad_norm": 7.554716121718214, "learning_rate": 6.1994480666504484e-06, "loss": 17.9225, "step": 24083 }, { "epoch": 0.4402361672180891, "grad_norm": 5.31264702707009, "learning_rate": 6.19916069649672e-06, "loss": 17.0408, "step": 24084 }, { "epoch": 0.4402544464145356, "grad_norm": 5.89945498771339, "learning_rate": 6.198873322140038e-06, "loss": 17.3624, "step": 24085 }, { "epoch": 0.44027272561098213, "grad_norm": 7.644977464635734, "learning_rate": 6.198585943581407e-06, "loss": 17.8964, "step": 24086 }, { "epoch": 0.44029100480742867, "grad_norm": 6.783231163989308, "learning_rate": 6.198298560821838e-06, "loss": 17.6885, "step": 24087 }, { "epoch": 0.4403092840038752, "grad_norm": 5.355605410047858, "learning_rate": 6.198011173862335e-06, "loss": 17.2444, "step": 24088 }, { "epoch": 0.44032756320032174, "grad_norm": 5.804702837279948, "learning_rate": 6.197723782703908e-06, "loss": 17.1333, "step": 24089 }, { "epoch": 0.4403458423967682, "grad_norm": 7.146224777024456, "learning_rate": 6.197436387347564e-06, "loss": 17.8382, "step": 24090 }, { "epoch": 0.44036412159321475, "grad_norm": 7.206326932353275, "learning_rate": 6.197148987794308e-06, "loss": 17.8005, "step": 24091 }, { "epoch": 0.4403824007896613, "grad_norm": 6.633553428716376, "learning_rate": 6.196861584045149e-06, "loss": 17.7012, "step": 24092 }, { "epoch": 0.4404006799861078, "grad_norm": 6.150559907538199, "learning_rate": 6.196574176101093e-06, "loss": 17.3696, "step": 24093 }, { "epoch": 0.44041895918255436, "grad_norm": 5.471955820876253, "learning_rate": 6.19628676396315e-06, "loss": 17.1405, "step": 24094 }, { "epoch": 0.44043723837900084, "grad_norm": 7.408212062568318, "learning_rate": 6.195999347632324e-06, "loss": 17.8337, "step": 24095 }, { "epoch": 0.4404555175754474, "grad_norm": 6.785862576964398, "learning_rate": 6.195711927109626e-06, "loss": 17.7062, "step": 24096 }, { "epoch": 0.4404737967718939, "grad_norm": 6.0664998422025045, "learning_rate": 6.19542450239606e-06, "loss": 17.7203, "step": 24097 }, { "epoch": 0.44049207596834045, "grad_norm": 6.928755566027269, "learning_rate": 6.1951370734926355e-06, "loss": 17.978, "step": 24098 }, { "epoch": 0.440510355164787, "grad_norm": 7.80758531994904, "learning_rate": 6.194849640400359e-06, "loss": 17.9731, "step": 24099 }, { "epoch": 0.44052863436123346, "grad_norm": 6.613035027127089, "learning_rate": 6.194562203120238e-06, "loss": 17.6502, "step": 24100 }, { "epoch": 0.44054691355768, "grad_norm": 7.56195043778486, "learning_rate": 6.194274761653281e-06, "loss": 18.3339, "step": 24101 }, { "epoch": 0.44056519275412653, "grad_norm": 6.171567868539247, "learning_rate": 6.1939873160004935e-06, "loss": 17.5275, "step": 24102 }, { "epoch": 0.44058347195057307, "grad_norm": 8.35353216457608, "learning_rate": 6.193699866162884e-06, "loss": 17.7445, "step": 24103 }, { "epoch": 0.4406017511470196, "grad_norm": 7.110938906108972, "learning_rate": 6.193412412141462e-06, "loss": 17.7643, "step": 24104 }, { "epoch": 0.4406200303434661, "grad_norm": 6.212587681886543, "learning_rate": 6.193124953937232e-06, "loss": 17.2705, "step": 24105 }, { "epoch": 0.4406383095399126, "grad_norm": 8.214394566138274, "learning_rate": 6.1928374915512024e-06, "loss": 17.8221, "step": 24106 }, { "epoch": 0.44065658873635916, "grad_norm": 5.751694848503822, "learning_rate": 6.192550024984381e-06, "loss": 17.581, "step": 24107 }, { "epoch": 0.4406748679328057, "grad_norm": 5.798670043369045, "learning_rate": 6.192262554237774e-06, "loss": 17.3722, "step": 24108 }, { "epoch": 0.44069314712925217, "grad_norm": 5.719614869280944, "learning_rate": 6.191975079312391e-06, "loss": 17.3738, "step": 24109 }, { "epoch": 0.4407114263256987, "grad_norm": 6.068344991425805, "learning_rate": 6.1916876002092394e-06, "loss": 17.2393, "step": 24110 }, { "epoch": 0.44072970552214524, "grad_norm": 5.9317090430344095, "learning_rate": 6.191400116929326e-06, "loss": 17.3586, "step": 24111 }, { "epoch": 0.4407479847185918, "grad_norm": 8.087094429335183, "learning_rate": 6.191112629473658e-06, "loss": 17.7309, "step": 24112 }, { "epoch": 0.4407662639150383, "grad_norm": 5.378131859592571, "learning_rate": 6.1908251378432434e-06, "loss": 17.1388, "step": 24113 }, { "epoch": 0.4407845431114848, "grad_norm": 6.349659146169226, "learning_rate": 6.190537642039092e-06, "loss": 17.5838, "step": 24114 }, { "epoch": 0.44080282230793133, "grad_norm": 6.523370246758639, "learning_rate": 6.1902501420622066e-06, "loss": 17.6528, "step": 24115 }, { "epoch": 0.44082110150437787, "grad_norm": 7.487310635175428, "learning_rate": 6.1899626379135995e-06, "loss": 17.5962, "step": 24116 }, { "epoch": 0.4408393807008244, "grad_norm": 7.361289340010666, "learning_rate": 6.189675129594276e-06, "loss": 18.0879, "step": 24117 }, { "epoch": 0.44085765989727094, "grad_norm": 7.610033682556638, "learning_rate": 6.189387617105246e-06, "loss": 17.7131, "step": 24118 }, { "epoch": 0.4408759390937174, "grad_norm": 5.350356172600791, "learning_rate": 6.1891001004475135e-06, "loss": 17.0352, "step": 24119 }, { "epoch": 0.44089421829016395, "grad_norm": 7.939890278819534, "learning_rate": 6.188812579622089e-06, "loss": 17.9314, "step": 24120 }, { "epoch": 0.4409124974866105, "grad_norm": 6.983534295055088, "learning_rate": 6.1885250546299805e-06, "loss": 17.6833, "step": 24121 }, { "epoch": 0.440930776683057, "grad_norm": 6.748967627266763, "learning_rate": 6.188237525472194e-06, "loss": 17.6455, "step": 24122 }, { "epoch": 0.44094905587950356, "grad_norm": 7.317423161030537, "learning_rate": 6.187949992149737e-06, "loss": 18.1427, "step": 24123 }, { "epoch": 0.44096733507595004, "grad_norm": 7.7601246543196645, "learning_rate": 6.18766245466362e-06, "loss": 17.9725, "step": 24124 }, { "epoch": 0.4409856142723966, "grad_norm": 6.938177549182238, "learning_rate": 6.187374913014849e-06, "loss": 17.5571, "step": 24125 }, { "epoch": 0.4410038934688431, "grad_norm": 6.565980904736275, "learning_rate": 6.187087367204431e-06, "loss": 17.5904, "step": 24126 }, { "epoch": 0.44102217266528965, "grad_norm": 7.039811303293514, "learning_rate": 6.186799817233376e-06, "loss": 17.7007, "step": 24127 }, { "epoch": 0.4410404518617362, "grad_norm": 6.225445666144468, "learning_rate": 6.186512263102691e-06, "loss": 17.3729, "step": 24128 }, { "epoch": 0.44105873105818266, "grad_norm": 4.526645758632178, "learning_rate": 6.18622470481338e-06, "loss": 16.7223, "step": 24129 }, { "epoch": 0.4410770102546292, "grad_norm": 5.020622148618269, "learning_rate": 6.1859371423664576e-06, "loss": 16.7954, "step": 24130 }, { "epoch": 0.44109528945107573, "grad_norm": 7.681872407141381, "learning_rate": 6.185649575762927e-06, "loss": 18.0012, "step": 24131 }, { "epoch": 0.44111356864752227, "grad_norm": 6.0825591877760985, "learning_rate": 6.1853620050038e-06, "loss": 17.4602, "step": 24132 }, { "epoch": 0.4411318478439688, "grad_norm": 7.331425433942787, "learning_rate": 6.18507443009008e-06, "loss": 17.8376, "step": 24133 }, { "epoch": 0.4411501270404153, "grad_norm": 7.5145986448707145, "learning_rate": 6.184786851022776e-06, "loss": 17.823, "step": 24134 }, { "epoch": 0.4411684062368618, "grad_norm": 8.883725417509767, "learning_rate": 6.184499267802899e-06, "loss": 18.034, "step": 24135 }, { "epoch": 0.44118668543330836, "grad_norm": 5.554931324624878, "learning_rate": 6.184211680431453e-06, "loss": 16.9853, "step": 24136 }, { "epoch": 0.4412049646297549, "grad_norm": 6.7366515615716205, "learning_rate": 6.1839240889094494e-06, "loss": 17.6005, "step": 24137 }, { "epoch": 0.44122324382620143, "grad_norm": 7.613902781998414, "learning_rate": 6.183636493237895e-06, "loss": 18.2018, "step": 24138 }, { "epoch": 0.4412415230226479, "grad_norm": 6.248567696616156, "learning_rate": 6.1833488934177956e-06, "loss": 17.35, "step": 24139 }, { "epoch": 0.44125980221909444, "grad_norm": 6.427620493390211, "learning_rate": 6.183061289450162e-06, "loss": 17.4878, "step": 24140 }, { "epoch": 0.441278081415541, "grad_norm": 6.949019104185665, "learning_rate": 6.182773681336e-06, "loss": 17.7226, "step": 24141 }, { "epoch": 0.4412963606119875, "grad_norm": 6.536886009697157, "learning_rate": 6.18248606907632e-06, "loss": 17.4815, "step": 24142 }, { "epoch": 0.441314639808434, "grad_norm": 6.913409699567346, "learning_rate": 6.182198452672129e-06, "loss": 17.379, "step": 24143 }, { "epoch": 0.44133291900488053, "grad_norm": 6.160386367775416, "learning_rate": 6.181910832124435e-06, "loss": 17.5737, "step": 24144 }, { "epoch": 0.44135119820132707, "grad_norm": 6.536423006612148, "learning_rate": 6.181623207434246e-06, "loss": 17.535, "step": 24145 }, { "epoch": 0.4413694773977736, "grad_norm": 4.932389707799513, "learning_rate": 6.1813355786025705e-06, "loss": 16.9306, "step": 24146 }, { "epoch": 0.44138775659422014, "grad_norm": 6.3232867337010665, "learning_rate": 6.181047945630415e-06, "loss": 17.3533, "step": 24147 }, { "epoch": 0.4414060357906666, "grad_norm": 6.136195358541279, "learning_rate": 6.18076030851879e-06, "loss": 17.2286, "step": 24148 }, { "epoch": 0.44142431498711315, "grad_norm": 6.138399060310465, "learning_rate": 6.180472667268703e-06, "loss": 17.3879, "step": 24149 }, { "epoch": 0.4414425941835597, "grad_norm": 4.486653669905141, "learning_rate": 6.180185021881161e-06, "loss": 16.8849, "step": 24150 }, { "epoch": 0.4414608733800062, "grad_norm": 5.733280473822919, "learning_rate": 6.179897372357173e-06, "loss": 17.1547, "step": 24151 }, { "epoch": 0.44147915257645276, "grad_norm": 5.648891036525072, "learning_rate": 6.179609718697748e-06, "loss": 17.3554, "step": 24152 }, { "epoch": 0.44149743177289924, "grad_norm": 7.292753873653095, "learning_rate": 6.179322060903892e-06, "loss": 17.5932, "step": 24153 }, { "epoch": 0.4415157109693458, "grad_norm": 6.400696449672667, "learning_rate": 6.1790343989766155e-06, "loss": 17.6274, "step": 24154 }, { "epoch": 0.4415339901657923, "grad_norm": 6.882805693823392, "learning_rate": 6.1787467329169245e-06, "loss": 17.4463, "step": 24155 }, { "epoch": 0.44155226936223885, "grad_norm": 6.98579812805766, "learning_rate": 6.178459062725829e-06, "loss": 17.4662, "step": 24156 }, { "epoch": 0.4415705485586854, "grad_norm": 5.993690162660362, "learning_rate": 6.178171388404337e-06, "loss": 17.3647, "step": 24157 }, { "epoch": 0.44158882775513186, "grad_norm": 5.645586666406506, "learning_rate": 6.177883709953457e-06, "loss": 17.2167, "step": 24158 }, { "epoch": 0.4416071069515784, "grad_norm": 6.216929702676079, "learning_rate": 6.177596027374197e-06, "loss": 17.6003, "step": 24159 }, { "epoch": 0.44162538614802493, "grad_norm": 8.872753679417796, "learning_rate": 6.177308340667565e-06, "loss": 18.4974, "step": 24160 }, { "epoch": 0.44164366534447147, "grad_norm": 7.430306716374661, "learning_rate": 6.177020649834567e-06, "loss": 18.0911, "step": 24161 }, { "epoch": 0.441661944540918, "grad_norm": 7.063357979590903, "learning_rate": 6.176732954876215e-06, "loss": 17.4794, "step": 24162 }, { "epoch": 0.4416802237373645, "grad_norm": 5.1590089963925445, "learning_rate": 6.1764452557935185e-06, "loss": 16.9837, "step": 24163 }, { "epoch": 0.441698502933811, "grad_norm": 6.164279204045742, "learning_rate": 6.176157552587481e-06, "loss": 17.3539, "step": 24164 }, { "epoch": 0.44171678213025756, "grad_norm": 9.5639257842599, "learning_rate": 6.175869845259115e-06, "loss": 18.3954, "step": 24165 }, { "epoch": 0.4417350613267041, "grad_norm": 5.901771352074644, "learning_rate": 6.175582133809426e-06, "loss": 17.3821, "step": 24166 }, { "epoch": 0.44175334052315063, "grad_norm": 7.029474309152619, "learning_rate": 6.175294418239424e-06, "loss": 17.1903, "step": 24167 }, { "epoch": 0.4417716197195971, "grad_norm": 7.983708765747276, "learning_rate": 6.175006698550117e-06, "loss": 17.8349, "step": 24168 }, { "epoch": 0.44178989891604364, "grad_norm": 5.763698744975223, "learning_rate": 6.174718974742513e-06, "loss": 17.1935, "step": 24169 }, { "epoch": 0.4418081781124902, "grad_norm": 6.64625075210769, "learning_rate": 6.174431246817621e-06, "loss": 17.4247, "step": 24170 }, { "epoch": 0.4418264573089367, "grad_norm": 6.923610002131327, "learning_rate": 6.17414351477645e-06, "loss": 17.6074, "step": 24171 }, { "epoch": 0.44184473650538325, "grad_norm": 8.948947891851802, "learning_rate": 6.173855778620007e-06, "loss": 18.4662, "step": 24172 }, { "epoch": 0.44186301570182973, "grad_norm": 7.043904495888951, "learning_rate": 6.173568038349304e-06, "loss": 17.8628, "step": 24173 }, { "epoch": 0.44188129489827627, "grad_norm": 5.500254369039936, "learning_rate": 6.173280293965343e-06, "loss": 16.8723, "step": 24174 }, { "epoch": 0.4418995740947228, "grad_norm": 6.490909643950598, "learning_rate": 6.172992545469139e-06, "loss": 17.4127, "step": 24175 }, { "epoch": 0.44191785329116934, "grad_norm": 5.883672564127929, "learning_rate": 6.172704792861698e-06, "loss": 17.1861, "step": 24176 }, { "epoch": 0.4419361324876158, "grad_norm": 6.70316592529986, "learning_rate": 6.172417036144027e-06, "loss": 17.4034, "step": 24177 }, { "epoch": 0.44195441168406235, "grad_norm": 6.224850651602484, "learning_rate": 6.172129275317137e-06, "loss": 17.2891, "step": 24178 }, { "epoch": 0.4419726908805089, "grad_norm": 7.706510807772626, "learning_rate": 6.171841510382034e-06, "loss": 18.1552, "step": 24179 }, { "epoch": 0.4419909700769554, "grad_norm": 6.183663533753914, "learning_rate": 6.17155374133973e-06, "loss": 17.4231, "step": 24180 }, { "epoch": 0.44200924927340196, "grad_norm": 7.01547013991577, "learning_rate": 6.171265968191231e-06, "loss": 17.8055, "step": 24181 }, { "epoch": 0.44202752846984844, "grad_norm": 7.553437793710265, "learning_rate": 6.170978190937547e-06, "loss": 18.0711, "step": 24182 }, { "epoch": 0.442045807666295, "grad_norm": 6.4266653424142115, "learning_rate": 6.170690409579685e-06, "loss": 17.5941, "step": 24183 }, { "epoch": 0.4420640868627415, "grad_norm": 5.947258359679659, "learning_rate": 6.170402624118655e-06, "loss": 17.1631, "step": 24184 }, { "epoch": 0.44208236605918805, "grad_norm": 5.5195636614023575, "learning_rate": 6.170114834555466e-06, "loss": 17.259, "step": 24185 }, { "epoch": 0.4421006452556346, "grad_norm": 7.218084105817804, "learning_rate": 6.1698270408911266e-06, "loss": 17.7056, "step": 24186 }, { "epoch": 0.44211892445208106, "grad_norm": 6.099651480618411, "learning_rate": 6.169539243126644e-06, "loss": 17.5093, "step": 24187 }, { "epoch": 0.4421372036485276, "grad_norm": 6.663853645288097, "learning_rate": 6.169251441263028e-06, "loss": 17.769, "step": 24188 }, { "epoch": 0.44215548284497413, "grad_norm": 5.849334981384076, "learning_rate": 6.168963635301287e-06, "loss": 17.1934, "step": 24189 }, { "epoch": 0.44217376204142067, "grad_norm": 5.993995202711705, "learning_rate": 6.168675825242431e-06, "loss": 16.9622, "step": 24190 }, { "epoch": 0.4421920412378672, "grad_norm": 5.630983754426648, "learning_rate": 6.168388011087466e-06, "loss": 17.2036, "step": 24191 }, { "epoch": 0.4422103204343137, "grad_norm": 7.195487849986069, "learning_rate": 6.168100192837403e-06, "loss": 18.0233, "step": 24192 }, { "epoch": 0.4422285996307602, "grad_norm": 7.121234997380964, "learning_rate": 6.167812370493249e-06, "loss": 17.7001, "step": 24193 }, { "epoch": 0.44224687882720676, "grad_norm": 7.362998605201447, "learning_rate": 6.167524544056018e-06, "loss": 18.134, "step": 24194 }, { "epoch": 0.4422651580236533, "grad_norm": 7.772638600808954, "learning_rate": 6.167236713526711e-06, "loss": 17.8615, "step": 24195 }, { "epoch": 0.44228343722009983, "grad_norm": 7.129707872885745, "learning_rate": 6.166948878906341e-06, "loss": 17.7359, "step": 24196 }, { "epoch": 0.4423017164165463, "grad_norm": 5.58093023938167, "learning_rate": 6.166661040195917e-06, "loss": 17.0113, "step": 24197 }, { "epoch": 0.44231999561299284, "grad_norm": 5.2716662428945185, "learning_rate": 6.166373197396448e-06, "loss": 17.0409, "step": 24198 }, { "epoch": 0.4423382748094394, "grad_norm": 6.236117649285141, "learning_rate": 6.166085350508941e-06, "loss": 17.4686, "step": 24199 }, { "epoch": 0.4423565540058859, "grad_norm": 6.92155220381434, "learning_rate": 6.165797499534407e-06, "loss": 17.9482, "step": 24200 }, { "epoch": 0.44237483320233245, "grad_norm": 6.806176269382211, "learning_rate": 6.165509644473855e-06, "loss": 18.0436, "step": 24201 }, { "epoch": 0.44239311239877893, "grad_norm": 6.325650007245598, "learning_rate": 6.165221785328289e-06, "loss": 17.4151, "step": 24202 }, { "epoch": 0.44241139159522547, "grad_norm": 5.469717470043323, "learning_rate": 6.164933922098725e-06, "loss": 17.1739, "step": 24203 }, { "epoch": 0.442429670791672, "grad_norm": 7.086379925307295, "learning_rate": 6.164646054786168e-06, "loss": 17.9885, "step": 24204 }, { "epoch": 0.44244794998811854, "grad_norm": 6.558634207594864, "learning_rate": 6.164358183391628e-06, "loss": 17.3538, "step": 24205 }, { "epoch": 0.4424662291845651, "grad_norm": 9.546454805546572, "learning_rate": 6.164070307916113e-06, "loss": 18.1258, "step": 24206 }, { "epoch": 0.44248450838101155, "grad_norm": 7.320380266248527, "learning_rate": 6.1637824283606314e-06, "loss": 18.2428, "step": 24207 }, { "epoch": 0.4425027875774581, "grad_norm": 7.917832202990051, "learning_rate": 6.163494544726195e-06, "loss": 17.5665, "step": 24208 }, { "epoch": 0.4425210667739046, "grad_norm": 7.062214357481698, "learning_rate": 6.163206657013811e-06, "loss": 17.871, "step": 24209 }, { "epoch": 0.44253934597035116, "grad_norm": 7.780263464878502, "learning_rate": 6.162918765224488e-06, "loss": 17.4316, "step": 24210 }, { "epoch": 0.44255762516679764, "grad_norm": 6.833201342721086, "learning_rate": 6.162630869359236e-06, "loss": 17.5612, "step": 24211 }, { "epoch": 0.4425759043632442, "grad_norm": 6.497135885491084, "learning_rate": 6.162342969419064e-06, "loss": 17.6416, "step": 24212 }, { "epoch": 0.4425941835596907, "grad_norm": 5.856626806966802, "learning_rate": 6.162055065404981e-06, "loss": 17.2533, "step": 24213 }, { "epoch": 0.44261246275613725, "grad_norm": 6.313626717112335, "learning_rate": 6.161767157317996e-06, "loss": 17.6512, "step": 24214 }, { "epoch": 0.4426307419525838, "grad_norm": 7.22027848710395, "learning_rate": 6.161479245159115e-06, "loss": 17.7917, "step": 24215 }, { "epoch": 0.44264902114903026, "grad_norm": 6.204356458413, "learning_rate": 6.161191328929354e-06, "loss": 17.5948, "step": 24216 }, { "epoch": 0.4426673003454768, "grad_norm": 7.258930959726999, "learning_rate": 6.160903408629716e-06, "loss": 17.3764, "step": 24217 }, { "epoch": 0.44268557954192334, "grad_norm": 8.039318793161836, "learning_rate": 6.160615484261213e-06, "loss": 17.8878, "step": 24218 }, { "epoch": 0.44270385873836987, "grad_norm": 5.714886397409613, "learning_rate": 6.160327555824853e-06, "loss": 17.2314, "step": 24219 }, { "epoch": 0.4427221379348164, "grad_norm": 5.866988746985408, "learning_rate": 6.160039623321645e-06, "loss": 17.38, "step": 24220 }, { "epoch": 0.4427404171312629, "grad_norm": 5.424162144011653, "learning_rate": 6.159751686752601e-06, "loss": 17.0537, "step": 24221 }, { "epoch": 0.4427586963277094, "grad_norm": 6.119947301657276, "learning_rate": 6.159463746118726e-06, "loss": 17.5312, "step": 24222 }, { "epoch": 0.44277697552415596, "grad_norm": 5.83570647973294, "learning_rate": 6.159175801421031e-06, "loss": 17.1355, "step": 24223 }, { "epoch": 0.4427952547206025, "grad_norm": 7.079606293471452, "learning_rate": 6.1588878526605265e-06, "loss": 17.6112, "step": 24224 }, { "epoch": 0.44281353391704903, "grad_norm": 6.662479255800925, "learning_rate": 6.15859989983822e-06, "loss": 17.9218, "step": 24225 }, { "epoch": 0.4428318131134955, "grad_norm": 5.76213768489833, "learning_rate": 6.158311942955122e-06, "loss": 17.2206, "step": 24226 }, { "epoch": 0.44285009230994204, "grad_norm": 7.465509608667722, "learning_rate": 6.1580239820122414e-06, "loss": 17.8642, "step": 24227 }, { "epoch": 0.4428683715063886, "grad_norm": 7.247346089799373, "learning_rate": 6.157736017010587e-06, "loss": 17.7723, "step": 24228 }, { "epoch": 0.4428866507028351, "grad_norm": 4.697584188159765, "learning_rate": 6.157448047951166e-06, "loss": 16.8858, "step": 24229 }, { "epoch": 0.44290492989928165, "grad_norm": 8.43424148909306, "learning_rate": 6.157160074834992e-06, "loss": 18.319, "step": 24230 }, { "epoch": 0.44292320909572813, "grad_norm": 6.109837674523738, "learning_rate": 6.156872097663073e-06, "loss": 17.1626, "step": 24231 }, { "epoch": 0.44294148829217467, "grad_norm": 6.568676897566294, "learning_rate": 6.1565841164364185e-06, "loss": 17.2407, "step": 24232 }, { "epoch": 0.4429597674886212, "grad_norm": 5.922361169144793, "learning_rate": 6.156296131156036e-06, "loss": 17.0792, "step": 24233 }, { "epoch": 0.44297804668506774, "grad_norm": 7.670628458841614, "learning_rate": 6.156008141822933e-06, "loss": 18.2302, "step": 24234 }, { "epoch": 0.4429963258815143, "grad_norm": 5.4385676906398555, "learning_rate": 6.155720148438126e-06, "loss": 17.2284, "step": 24235 }, { "epoch": 0.44301460507796075, "grad_norm": 6.069382087305104, "learning_rate": 6.155432151002618e-06, "loss": 17.2016, "step": 24236 }, { "epoch": 0.4430328842744073, "grad_norm": 5.568902653557678, "learning_rate": 6.15514414951742e-06, "loss": 17.0489, "step": 24237 }, { "epoch": 0.4430511634708538, "grad_norm": 6.347852184975662, "learning_rate": 6.154856143983544e-06, "loss": 17.5024, "step": 24238 }, { "epoch": 0.44306944266730036, "grad_norm": 6.4876102673276534, "learning_rate": 6.154568134401996e-06, "loss": 17.4501, "step": 24239 }, { "epoch": 0.4430877218637469, "grad_norm": 5.946846618681404, "learning_rate": 6.154280120773787e-06, "loss": 17.3774, "step": 24240 }, { "epoch": 0.4431060010601934, "grad_norm": 6.819805116977419, "learning_rate": 6.1539921030999276e-06, "loss": 17.2599, "step": 24241 }, { "epoch": 0.4431242802566399, "grad_norm": 6.4322762761306285, "learning_rate": 6.153704081381424e-06, "loss": 17.499, "step": 24242 }, { "epoch": 0.44314255945308645, "grad_norm": 7.1095276575998, "learning_rate": 6.153416055619289e-06, "loss": 17.6187, "step": 24243 }, { "epoch": 0.443160838649533, "grad_norm": 5.3262334183648665, "learning_rate": 6.15312802581453e-06, "loss": 16.8974, "step": 24244 }, { "epoch": 0.44317911784597946, "grad_norm": 6.892633490560132, "learning_rate": 6.152839991968159e-06, "loss": 17.5396, "step": 24245 }, { "epoch": 0.443197397042426, "grad_norm": 6.6684426234079615, "learning_rate": 6.152551954081183e-06, "loss": 17.5883, "step": 24246 }, { "epoch": 0.44321567623887254, "grad_norm": 5.857402012260117, "learning_rate": 6.152263912154611e-06, "loss": 17.3895, "step": 24247 }, { "epoch": 0.44323395543531907, "grad_norm": 6.58462638691802, "learning_rate": 6.151975866189455e-06, "loss": 17.6756, "step": 24248 }, { "epoch": 0.4432522346317656, "grad_norm": 8.599941133848265, "learning_rate": 6.151687816186725e-06, "loss": 18.5927, "step": 24249 }, { "epoch": 0.4432705138282121, "grad_norm": 6.610459640862656, "learning_rate": 6.151399762147428e-06, "loss": 17.3349, "step": 24250 }, { "epoch": 0.4432887930246586, "grad_norm": 6.320284833880291, "learning_rate": 6.151111704072574e-06, "loss": 17.5565, "step": 24251 }, { "epoch": 0.44330707222110516, "grad_norm": 6.754158347300993, "learning_rate": 6.150823641963174e-06, "loss": 17.9968, "step": 24252 }, { "epoch": 0.4433253514175517, "grad_norm": 6.009291161879157, "learning_rate": 6.150535575820237e-06, "loss": 17.4278, "step": 24253 }, { "epoch": 0.44334363061399823, "grad_norm": 6.194526641385256, "learning_rate": 6.150247505644773e-06, "loss": 17.327, "step": 24254 }, { "epoch": 0.4433619098104447, "grad_norm": 8.857128349580567, "learning_rate": 6.149959431437791e-06, "loss": 18.4417, "step": 24255 }, { "epoch": 0.44338018900689125, "grad_norm": 5.916802344099659, "learning_rate": 6.149671353200301e-06, "loss": 17.2905, "step": 24256 }, { "epoch": 0.4433984682033378, "grad_norm": 5.3010549313235895, "learning_rate": 6.149383270933311e-06, "loss": 17.1912, "step": 24257 }, { "epoch": 0.4434167473997843, "grad_norm": 6.769126587055146, "learning_rate": 6.149095184637834e-06, "loss": 18.0512, "step": 24258 }, { "epoch": 0.44343502659623085, "grad_norm": 6.034267195322498, "learning_rate": 6.148807094314879e-06, "loss": 17.3947, "step": 24259 }, { "epoch": 0.44345330579267733, "grad_norm": 6.846131522263476, "learning_rate": 6.148518999965454e-06, "loss": 17.6969, "step": 24260 }, { "epoch": 0.44347158498912387, "grad_norm": 6.654444558424248, "learning_rate": 6.148230901590568e-06, "loss": 17.5979, "step": 24261 }, { "epoch": 0.4434898641855704, "grad_norm": 5.927856863203363, "learning_rate": 6.147942799191235e-06, "loss": 17.3457, "step": 24262 }, { "epoch": 0.44350814338201694, "grad_norm": 5.881571836399706, "learning_rate": 6.147654692768461e-06, "loss": 17.3193, "step": 24263 }, { "epoch": 0.4435264225784635, "grad_norm": 6.040106129984872, "learning_rate": 6.1473665823232565e-06, "loss": 17.1444, "step": 24264 }, { "epoch": 0.44354470177490996, "grad_norm": 6.189774380879795, "learning_rate": 6.147078467856632e-06, "loss": 17.3509, "step": 24265 }, { "epoch": 0.4435629809713565, "grad_norm": 6.826063409108439, "learning_rate": 6.146790349369597e-06, "loss": 17.5885, "step": 24266 }, { "epoch": 0.443581260167803, "grad_norm": 6.001597727562739, "learning_rate": 6.146502226863161e-06, "loss": 17.4777, "step": 24267 }, { "epoch": 0.44359953936424956, "grad_norm": 8.585103529418781, "learning_rate": 6.146214100338335e-06, "loss": 18.3625, "step": 24268 }, { "epoch": 0.4436178185606961, "grad_norm": 6.588201065544275, "learning_rate": 6.1459259697961275e-06, "loss": 17.3728, "step": 24269 }, { "epoch": 0.4436360977571426, "grad_norm": 6.145629592495774, "learning_rate": 6.145637835237549e-06, "loss": 17.6649, "step": 24270 }, { "epoch": 0.4436543769535891, "grad_norm": 6.569078665093694, "learning_rate": 6.145349696663608e-06, "loss": 17.7227, "step": 24271 }, { "epoch": 0.44367265615003565, "grad_norm": 7.128844505822352, "learning_rate": 6.145061554075318e-06, "loss": 17.815, "step": 24272 }, { "epoch": 0.4436909353464822, "grad_norm": 5.4845654340425956, "learning_rate": 6.144773407473686e-06, "loss": 17.1778, "step": 24273 }, { "epoch": 0.4437092145429287, "grad_norm": 5.535464992401839, "learning_rate": 6.144485256859722e-06, "loss": 17.23, "step": 24274 }, { "epoch": 0.4437274937393752, "grad_norm": 5.522928988988442, "learning_rate": 6.144197102234436e-06, "loss": 17.0113, "step": 24275 }, { "epoch": 0.44374577293582174, "grad_norm": 6.9864170491744995, "learning_rate": 6.14390894359884e-06, "loss": 17.6287, "step": 24276 }, { "epoch": 0.44376405213226827, "grad_norm": 7.583486048710066, "learning_rate": 6.143620780953941e-06, "loss": 17.9127, "step": 24277 }, { "epoch": 0.4437823313287148, "grad_norm": 6.465241793642631, "learning_rate": 6.143332614300751e-06, "loss": 17.4757, "step": 24278 }, { "epoch": 0.4438006105251613, "grad_norm": 7.142216981775892, "learning_rate": 6.143044443640278e-06, "loss": 17.6552, "step": 24279 }, { "epoch": 0.4438188897216078, "grad_norm": 5.9695328808129435, "learning_rate": 6.142756268973536e-06, "loss": 17.2828, "step": 24280 }, { "epoch": 0.44383716891805436, "grad_norm": 6.9498493225935025, "learning_rate": 6.142468090301531e-06, "loss": 17.6509, "step": 24281 }, { "epoch": 0.4438554481145009, "grad_norm": 5.187061746346155, "learning_rate": 6.142179907625274e-06, "loss": 16.9073, "step": 24282 }, { "epoch": 0.44387372731094743, "grad_norm": 6.568881953882317, "learning_rate": 6.141891720945776e-06, "loss": 17.2912, "step": 24283 }, { "epoch": 0.4438920065073939, "grad_norm": 10.60555002158264, "learning_rate": 6.141603530264046e-06, "loss": 17.8106, "step": 24284 }, { "epoch": 0.44391028570384045, "grad_norm": 8.03583234211266, "learning_rate": 6.141315335581096e-06, "loss": 17.9401, "step": 24285 }, { "epoch": 0.443928564900287, "grad_norm": 7.243956753975113, "learning_rate": 6.141027136897935e-06, "loss": 17.7223, "step": 24286 }, { "epoch": 0.4439468440967335, "grad_norm": 7.500720564815997, "learning_rate": 6.140738934215572e-06, "loss": 17.6781, "step": 24287 }, { "epoch": 0.44396512329318005, "grad_norm": 6.193149165372164, "learning_rate": 6.140450727535018e-06, "loss": 17.6141, "step": 24288 }, { "epoch": 0.44398340248962653, "grad_norm": 7.03293458091303, "learning_rate": 6.140162516857283e-06, "loss": 17.5132, "step": 24289 }, { "epoch": 0.44400168168607307, "grad_norm": 7.1255967726294385, "learning_rate": 6.139874302183379e-06, "loss": 17.7558, "step": 24290 }, { "epoch": 0.4440199608825196, "grad_norm": 6.825648015376803, "learning_rate": 6.1395860835143125e-06, "loss": 17.4193, "step": 24291 }, { "epoch": 0.44403824007896614, "grad_norm": 7.630204429301696, "learning_rate": 6.139297860851097e-06, "loss": 17.326, "step": 24292 }, { "epoch": 0.4440565192754127, "grad_norm": 6.944416049690852, "learning_rate": 6.139009634194739e-06, "loss": 17.7461, "step": 24293 }, { "epoch": 0.44407479847185916, "grad_norm": 6.009494017367086, "learning_rate": 6.138721403546252e-06, "loss": 17.4291, "step": 24294 }, { "epoch": 0.4440930776683057, "grad_norm": 6.813643389771506, "learning_rate": 6.1384331689066475e-06, "loss": 17.8385, "step": 24295 }, { "epoch": 0.4441113568647522, "grad_norm": 7.091083405823428, "learning_rate": 6.138144930276931e-06, "loss": 17.6301, "step": 24296 }, { "epoch": 0.44412963606119876, "grad_norm": 7.7125590736864815, "learning_rate": 6.137856687658117e-06, "loss": 17.3912, "step": 24297 }, { "epoch": 0.4441479152576453, "grad_norm": 6.410050452714684, "learning_rate": 6.137568441051214e-06, "loss": 17.7583, "step": 24298 }, { "epoch": 0.4441661944540918, "grad_norm": 6.996499164391479, "learning_rate": 6.137280190457231e-06, "loss": 17.683, "step": 24299 }, { "epoch": 0.4441844736505383, "grad_norm": 5.371767244130282, "learning_rate": 6.1369919358771805e-06, "loss": 17.1344, "step": 24300 }, { "epoch": 0.44420275284698485, "grad_norm": 6.341875127461749, "learning_rate": 6.136703677312071e-06, "loss": 17.2414, "step": 24301 }, { "epoch": 0.4442210320434314, "grad_norm": 6.628741169783307, "learning_rate": 6.136415414762915e-06, "loss": 17.8268, "step": 24302 }, { "epoch": 0.4442393112398779, "grad_norm": 5.533741679658401, "learning_rate": 6.13612714823072e-06, "loss": 17.3195, "step": 24303 }, { "epoch": 0.4442575904363244, "grad_norm": 6.661046720425952, "learning_rate": 6.1358388777165e-06, "loss": 17.4677, "step": 24304 }, { "epoch": 0.44427586963277094, "grad_norm": 5.47171707160309, "learning_rate": 6.1355506032212635e-06, "loss": 17.1375, "step": 24305 }, { "epoch": 0.4442941488292175, "grad_norm": 6.598453224838951, "learning_rate": 6.135262324746017e-06, "loss": 17.418, "step": 24306 }, { "epoch": 0.444312428025664, "grad_norm": 6.932034174488093, "learning_rate": 6.1349740422917785e-06, "loss": 17.4595, "step": 24307 }, { "epoch": 0.44433070722211054, "grad_norm": 6.785210180932653, "learning_rate": 6.134685755859553e-06, "loss": 17.5935, "step": 24308 }, { "epoch": 0.444348986418557, "grad_norm": 6.757859135595257, "learning_rate": 6.134397465450353e-06, "loss": 17.3384, "step": 24309 }, { "epoch": 0.44436726561500356, "grad_norm": 6.447510426771404, "learning_rate": 6.1341091710651866e-06, "loss": 17.4765, "step": 24310 }, { "epoch": 0.4443855448114501, "grad_norm": 7.09644440450274, "learning_rate": 6.133820872705068e-06, "loss": 17.3937, "step": 24311 }, { "epoch": 0.44440382400789663, "grad_norm": 5.429525403196993, "learning_rate": 6.133532570371005e-06, "loss": 17.2287, "step": 24312 }, { "epoch": 0.4444221032043431, "grad_norm": 10.01770545589729, "learning_rate": 6.133244264064007e-06, "loss": 17.9477, "step": 24313 }, { "epoch": 0.44444038240078965, "grad_norm": 5.171950826862728, "learning_rate": 6.132955953785089e-06, "loss": 16.983, "step": 24314 }, { "epoch": 0.4444586615972362, "grad_norm": 7.792712040468101, "learning_rate": 6.132667639535257e-06, "loss": 17.5489, "step": 24315 }, { "epoch": 0.4444769407936827, "grad_norm": 6.560204077919434, "learning_rate": 6.132379321315522e-06, "loss": 17.6316, "step": 24316 }, { "epoch": 0.44449521999012925, "grad_norm": 5.694203920575719, "learning_rate": 6.1320909991268984e-06, "loss": 17.1112, "step": 24317 }, { "epoch": 0.44451349918657573, "grad_norm": 5.907759863783654, "learning_rate": 6.131802672970394e-06, "loss": 17.4768, "step": 24318 }, { "epoch": 0.44453177838302227, "grad_norm": 5.786420166290249, "learning_rate": 6.131514342847018e-06, "loss": 17.2686, "step": 24319 }, { "epoch": 0.4445500575794688, "grad_norm": 5.972920879570193, "learning_rate": 6.131226008757781e-06, "loss": 17.2622, "step": 24320 }, { "epoch": 0.44456833677591534, "grad_norm": 7.229629167659487, "learning_rate": 6.1309376707036986e-06, "loss": 17.5661, "step": 24321 }, { "epoch": 0.4445866159723619, "grad_norm": 8.328817984606994, "learning_rate": 6.130649328685776e-06, "loss": 17.6267, "step": 24322 }, { "epoch": 0.44460489516880836, "grad_norm": 6.499535407976775, "learning_rate": 6.130360982705026e-06, "loss": 17.4157, "step": 24323 }, { "epoch": 0.4446231743652549, "grad_norm": 7.7690598525152765, "learning_rate": 6.130072632762458e-06, "loss": 18.2608, "step": 24324 }, { "epoch": 0.4446414535617014, "grad_norm": 8.653909364164772, "learning_rate": 6.129784278859083e-06, "loss": 18.9854, "step": 24325 }, { "epoch": 0.44465973275814796, "grad_norm": 6.04688392428587, "learning_rate": 6.129495920995913e-06, "loss": 17.3662, "step": 24326 }, { "epoch": 0.4446780119545945, "grad_norm": 5.861343213502779, "learning_rate": 6.129207559173958e-06, "loss": 17.3296, "step": 24327 }, { "epoch": 0.444696291151041, "grad_norm": 7.353721394555597, "learning_rate": 6.128919193394231e-06, "loss": 17.9263, "step": 24328 }, { "epoch": 0.4447145703474875, "grad_norm": 6.899622820319202, "learning_rate": 6.128630823657735e-06, "loss": 17.7727, "step": 24329 }, { "epoch": 0.44473284954393405, "grad_norm": 7.335581877620247, "learning_rate": 6.128342449965488e-06, "loss": 17.8733, "step": 24330 }, { "epoch": 0.4447511287403806, "grad_norm": 5.702965774210911, "learning_rate": 6.1280540723185e-06, "loss": 17.1145, "step": 24331 }, { "epoch": 0.4447694079368271, "grad_norm": 6.254112132783163, "learning_rate": 6.127765690717781e-06, "loss": 17.3912, "step": 24332 }, { "epoch": 0.4447876871332736, "grad_norm": 6.160431164341467, "learning_rate": 6.127477305164339e-06, "loss": 17.5638, "step": 24333 }, { "epoch": 0.44480596632972014, "grad_norm": 4.911584992933277, "learning_rate": 6.127188915659186e-06, "loss": 16.855, "step": 24334 }, { "epoch": 0.4448242455261667, "grad_norm": 7.166647929728161, "learning_rate": 6.126900522203336e-06, "loss": 17.8046, "step": 24335 }, { "epoch": 0.4448425247226132, "grad_norm": 7.861798426693862, "learning_rate": 6.126612124797797e-06, "loss": 18.479, "step": 24336 }, { "epoch": 0.44486080391905974, "grad_norm": 6.754642858429004, "learning_rate": 6.12632372344358e-06, "loss": 17.7218, "step": 24337 }, { "epoch": 0.4448790831155062, "grad_norm": 7.082069116742022, "learning_rate": 6.126035318141694e-06, "loss": 17.8935, "step": 24338 }, { "epoch": 0.44489736231195276, "grad_norm": 5.984734050673375, "learning_rate": 6.1257469088931556e-06, "loss": 17.2154, "step": 24339 }, { "epoch": 0.4449156415083993, "grad_norm": 6.255999503580697, "learning_rate": 6.125458495698971e-06, "loss": 17.597, "step": 24340 }, { "epoch": 0.44493392070484583, "grad_norm": 5.921269898866655, "learning_rate": 6.12517007856015e-06, "loss": 17.6401, "step": 24341 }, { "epoch": 0.44495219990129237, "grad_norm": 8.369976768902918, "learning_rate": 6.124881657477707e-06, "loss": 17.893, "step": 24342 }, { "epoch": 0.44497047909773885, "grad_norm": 6.758463596355022, "learning_rate": 6.124593232452652e-06, "loss": 17.5642, "step": 24343 }, { "epoch": 0.4449887582941854, "grad_norm": 5.48942460446099, "learning_rate": 6.124304803485994e-06, "loss": 17.0318, "step": 24344 }, { "epoch": 0.4450070374906319, "grad_norm": 7.397430182434983, "learning_rate": 6.124016370578747e-06, "loss": 17.5278, "step": 24345 }, { "epoch": 0.44502531668707845, "grad_norm": 6.604856253075043, "learning_rate": 6.123727933731918e-06, "loss": 17.6773, "step": 24346 }, { "epoch": 0.44504359588352493, "grad_norm": 6.016318657053202, "learning_rate": 6.1234394929465206e-06, "loss": 17.0158, "step": 24347 }, { "epoch": 0.44506187507997147, "grad_norm": 5.993272945402518, "learning_rate": 6.123151048223565e-06, "loss": 17.2964, "step": 24348 }, { "epoch": 0.445080154276418, "grad_norm": 7.895884841218638, "learning_rate": 6.1228625995640645e-06, "loss": 18.2877, "step": 24349 }, { "epoch": 0.44509843347286454, "grad_norm": 7.822172859346337, "learning_rate": 6.122574146969026e-06, "loss": 17.9766, "step": 24350 }, { "epoch": 0.4451167126693111, "grad_norm": 4.894234732054747, "learning_rate": 6.122285690439464e-06, "loss": 16.969, "step": 24351 }, { "epoch": 0.44513499186575756, "grad_norm": 5.545973586692187, "learning_rate": 6.121997229976387e-06, "loss": 17.0296, "step": 24352 }, { "epoch": 0.4451532710622041, "grad_norm": 7.603804127291949, "learning_rate": 6.121708765580807e-06, "loss": 17.7488, "step": 24353 }, { "epoch": 0.44517155025865063, "grad_norm": 6.185939227796631, "learning_rate": 6.121420297253735e-06, "loss": 17.534, "step": 24354 }, { "epoch": 0.44518982945509716, "grad_norm": 5.910913781325826, "learning_rate": 6.121131824996183e-06, "loss": 17.3078, "step": 24355 }, { "epoch": 0.4452081086515437, "grad_norm": 5.5531843054562, "learning_rate": 6.1208433488091604e-06, "loss": 17.0702, "step": 24356 }, { "epoch": 0.4452263878479902, "grad_norm": 6.177239257868246, "learning_rate": 6.12055486869368e-06, "loss": 17.5976, "step": 24357 }, { "epoch": 0.4452446670444367, "grad_norm": 6.5302201683004935, "learning_rate": 6.1202663846507505e-06, "loss": 17.3438, "step": 24358 }, { "epoch": 0.44526294624088325, "grad_norm": 6.506877468055951, "learning_rate": 6.119977896681387e-06, "loss": 17.9006, "step": 24359 }, { "epoch": 0.4452812254373298, "grad_norm": 7.115421182349851, "learning_rate": 6.1196894047865964e-06, "loss": 17.5706, "step": 24360 }, { "epoch": 0.4452995046337763, "grad_norm": 6.869706757941675, "learning_rate": 6.119400908967391e-06, "loss": 17.8241, "step": 24361 }, { "epoch": 0.4453177838302228, "grad_norm": 6.170737916919833, "learning_rate": 6.119112409224783e-06, "loss": 17.3096, "step": 24362 }, { "epoch": 0.44533606302666934, "grad_norm": 6.320784406038538, "learning_rate": 6.118823905559785e-06, "loss": 17.5181, "step": 24363 }, { "epoch": 0.4453543422231159, "grad_norm": 7.279107281238489, "learning_rate": 6.1185353979734055e-06, "loss": 17.5462, "step": 24364 }, { "epoch": 0.4453726214195624, "grad_norm": 6.995689565213354, "learning_rate": 6.118246886466655e-06, "loss": 17.8895, "step": 24365 }, { "epoch": 0.44539090061600894, "grad_norm": 7.393614652143189, "learning_rate": 6.117958371040548e-06, "loss": 17.9326, "step": 24366 }, { "epoch": 0.4454091798124554, "grad_norm": 7.519159012904411, "learning_rate": 6.1176698516960916e-06, "loss": 17.8713, "step": 24367 }, { "epoch": 0.44542745900890196, "grad_norm": 5.349614964005685, "learning_rate": 6.117381328434302e-06, "loss": 17.0897, "step": 24368 }, { "epoch": 0.4454457382053485, "grad_norm": 5.568668906025615, "learning_rate": 6.117092801256186e-06, "loss": 17.0726, "step": 24369 }, { "epoch": 0.44546401740179503, "grad_norm": 5.97316198049961, "learning_rate": 6.1168042701627574e-06, "loss": 17.3952, "step": 24370 }, { "epoch": 0.44548229659824157, "grad_norm": 5.999665770267773, "learning_rate": 6.116515735155026e-06, "loss": 17.5104, "step": 24371 }, { "epoch": 0.44550057579468805, "grad_norm": 7.194786978556543, "learning_rate": 6.116227196234005e-06, "loss": 17.5944, "step": 24372 }, { "epoch": 0.4455188549911346, "grad_norm": 5.967959370906969, "learning_rate": 6.115938653400705e-06, "loss": 17.2163, "step": 24373 }, { "epoch": 0.4455371341875811, "grad_norm": 6.690550135980042, "learning_rate": 6.115650106656134e-06, "loss": 17.3666, "step": 24374 }, { "epoch": 0.44555541338402765, "grad_norm": 5.803152695053935, "learning_rate": 6.115361556001308e-06, "loss": 17.4216, "step": 24375 }, { "epoch": 0.4455736925804742, "grad_norm": 6.895514055682177, "learning_rate": 6.1150730014372375e-06, "loss": 17.557, "step": 24376 }, { "epoch": 0.44559197177692067, "grad_norm": 6.313370438085639, "learning_rate": 6.114784442964932e-06, "loss": 17.2118, "step": 24377 }, { "epoch": 0.4456102509733672, "grad_norm": 6.0538085555126235, "learning_rate": 6.114495880585404e-06, "loss": 17.1377, "step": 24378 }, { "epoch": 0.44562853016981374, "grad_norm": 6.642174924296055, "learning_rate": 6.114207314299662e-06, "loss": 17.888, "step": 24379 }, { "epoch": 0.4456468093662603, "grad_norm": 7.1705988045603, "learning_rate": 6.1139187441087246e-06, "loss": 17.6493, "step": 24380 }, { "epoch": 0.44566508856270676, "grad_norm": 6.5318335551960915, "learning_rate": 6.113630170013596e-06, "loss": 17.3605, "step": 24381 }, { "epoch": 0.4456833677591533, "grad_norm": 7.349949186536805, "learning_rate": 6.11334159201529e-06, "loss": 18.0314, "step": 24382 }, { "epoch": 0.44570164695559983, "grad_norm": 6.590783839230621, "learning_rate": 6.11305301011482e-06, "loss": 17.5504, "step": 24383 }, { "epoch": 0.44571992615204636, "grad_norm": 6.166497304071057, "learning_rate": 6.1127644243131945e-06, "loss": 17.3252, "step": 24384 }, { "epoch": 0.4457382053484929, "grad_norm": 7.171042314064051, "learning_rate": 6.112475834611426e-06, "loss": 17.6472, "step": 24385 }, { "epoch": 0.4457564845449394, "grad_norm": 7.9448358019926735, "learning_rate": 6.112187241010527e-06, "loss": 18.3846, "step": 24386 }, { "epoch": 0.4457747637413859, "grad_norm": 6.725274435039454, "learning_rate": 6.111898643511509e-06, "loss": 17.7046, "step": 24387 }, { "epoch": 0.44579304293783245, "grad_norm": 6.3831455941825554, "learning_rate": 6.111610042115381e-06, "loss": 17.5592, "step": 24388 }, { "epoch": 0.445811322134279, "grad_norm": 5.259620222601401, "learning_rate": 6.111321436823157e-06, "loss": 17.1178, "step": 24389 }, { "epoch": 0.4458296013307255, "grad_norm": 5.712018163076865, "learning_rate": 6.11103282763585e-06, "loss": 17.2385, "step": 24390 }, { "epoch": 0.445847880527172, "grad_norm": 6.489322288991205, "learning_rate": 6.110744214554467e-06, "loss": 17.3642, "step": 24391 }, { "epoch": 0.44586615972361854, "grad_norm": 6.893296290587616, "learning_rate": 6.110455597580022e-06, "loss": 17.5147, "step": 24392 }, { "epoch": 0.4458844389200651, "grad_norm": 5.634470184937584, "learning_rate": 6.110166976713525e-06, "loss": 17.1702, "step": 24393 }, { "epoch": 0.4459027181165116, "grad_norm": 7.946888191806957, "learning_rate": 6.109878351955992e-06, "loss": 18.0497, "step": 24394 }, { "epoch": 0.44592099731295815, "grad_norm": 7.873532003521282, "learning_rate": 6.10958972330843e-06, "loss": 18.1097, "step": 24395 }, { "epoch": 0.4459392765094046, "grad_norm": 8.331721818258567, "learning_rate": 6.109301090771853e-06, "loss": 17.5412, "step": 24396 }, { "epoch": 0.44595755570585116, "grad_norm": 6.31519033379773, "learning_rate": 6.109012454347272e-06, "loss": 17.3509, "step": 24397 }, { "epoch": 0.4459758349022977, "grad_norm": 8.558776793205876, "learning_rate": 6.108723814035697e-06, "loss": 18.4301, "step": 24398 }, { "epoch": 0.44599411409874423, "grad_norm": 6.507503840989197, "learning_rate": 6.108435169838143e-06, "loss": 17.6548, "step": 24399 }, { "epoch": 0.44601239329519077, "grad_norm": 5.944197870978429, "learning_rate": 6.108146521755619e-06, "loss": 17.2269, "step": 24400 }, { "epoch": 0.44603067249163725, "grad_norm": 5.1432332411241815, "learning_rate": 6.107857869789139e-06, "loss": 17.158, "step": 24401 }, { "epoch": 0.4460489516880838, "grad_norm": 6.723672584918994, "learning_rate": 6.107569213939712e-06, "loss": 17.4827, "step": 24402 }, { "epoch": 0.4460672308845303, "grad_norm": 8.294100445087008, "learning_rate": 6.107280554208351e-06, "loss": 17.8669, "step": 24403 }, { "epoch": 0.44608551008097685, "grad_norm": 7.0036918377540776, "learning_rate": 6.106991890596069e-06, "loss": 17.6244, "step": 24404 }, { "epoch": 0.4461037892774234, "grad_norm": 6.762475181686122, "learning_rate": 6.106703223103876e-06, "loss": 17.6889, "step": 24405 }, { "epoch": 0.44612206847386987, "grad_norm": 7.7299651463420425, "learning_rate": 6.106414551732782e-06, "loss": 17.7296, "step": 24406 }, { "epoch": 0.4461403476703164, "grad_norm": 5.927754323053762, "learning_rate": 6.1061258764838025e-06, "loss": 17.4274, "step": 24407 }, { "epoch": 0.44615862686676294, "grad_norm": 7.176967531030489, "learning_rate": 6.105837197357949e-06, "loss": 17.7028, "step": 24408 }, { "epoch": 0.4461769060632095, "grad_norm": 8.012934018334555, "learning_rate": 6.105548514356232e-06, "loss": 18.5327, "step": 24409 }, { "epoch": 0.446195185259656, "grad_norm": 7.480270741527926, "learning_rate": 6.105259827479662e-06, "loss": 17.9369, "step": 24410 }, { "epoch": 0.4462134644561025, "grad_norm": 6.2645173973984996, "learning_rate": 6.104971136729253e-06, "loss": 17.4705, "step": 24411 }, { "epoch": 0.44623174365254903, "grad_norm": 6.237915343950155, "learning_rate": 6.104682442106016e-06, "loss": 17.6889, "step": 24412 }, { "epoch": 0.44625002284899556, "grad_norm": 5.41888111081362, "learning_rate": 6.1043937436109626e-06, "loss": 17.0488, "step": 24413 }, { "epoch": 0.4462683020454421, "grad_norm": 5.957470474484199, "learning_rate": 6.104105041245106e-06, "loss": 17.3516, "step": 24414 }, { "epoch": 0.4462865812418886, "grad_norm": 5.838828191115028, "learning_rate": 6.103816335009455e-06, "loss": 17.174, "step": 24415 }, { "epoch": 0.4463048604383351, "grad_norm": 6.069141699449847, "learning_rate": 6.1035276249050246e-06, "loss": 17.0595, "step": 24416 }, { "epoch": 0.44632313963478165, "grad_norm": 7.87473920273802, "learning_rate": 6.103238910932825e-06, "loss": 17.7671, "step": 24417 }, { "epoch": 0.4463414188312282, "grad_norm": 6.3488164438989685, "learning_rate": 6.102950193093871e-06, "loss": 17.5785, "step": 24418 }, { "epoch": 0.4463596980276747, "grad_norm": 5.646912176149879, "learning_rate": 6.102661471389171e-06, "loss": 17.1245, "step": 24419 }, { "epoch": 0.4463779772241212, "grad_norm": 5.528284778019849, "learning_rate": 6.1023727458197355e-06, "loss": 17.166, "step": 24420 }, { "epoch": 0.44639625642056774, "grad_norm": 6.485533940654361, "learning_rate": 6.102084016386583e-06, "loss": 17.707, "step": 24421 }, { "epoch": 0.4464145356170143, "grad_norm": 7.741099090791554, "learning_rate": 6.101795283090721e-06, "loss": 18.1211, "step": 24422 }, { "epoch": 0.4464328148134608, "grad_norm": 6.9229104341689025, "learning_rate": 6.101506545933161e-06, "loss": 17.6524, "step": 24423 }, { "epoch": 0.44645109400990735, "grad_norm": 6.377767345759849, "learning_rate": 6.101217804914917e-06, "loss": 17.6627, "step": 24424 }, { "epoch": 0.4464693732063538, "grad_norm": 5.490576776855622, "learning_rate": 6.1009290600369995e-06, "loss": 17.3771, "step": 24425 }, { "epoch": 0.44648765240280036, "grad_norm": 5.651945789549618, "learning_rate": 6.100640311300421e-06, "loss": 17.0331, "step": 24426 }, { "epoch": 0.4465059315992469, "grad_norm": 5.449373409515362, "learning_rate": 6.100351558706194e-06, "loss": 17.0476, "step": 24427 }, { "epoch": 0.44652421079569343, "grad_norm": 8.799801754872561, "learning_rate": 6.100062802255331e-06, "loss": 17.9051, "step": 24428 }, { "epoch": 0.44654248999213997, "grad_norm": 5.901880458321972, "learning_rate": 6.099774041948843e-06, "loss": 17.3573, "step": 24429 }, { "epoch": 0.44656076918858645, "grad_norm": 8.634143831220134, "learning_rate": 6.099485277787741e-06, "loss": 18.4925, "step": 24430 }, { "epoch": 0.446579048385033, "grad_norm": 7.463467214231075, "learning_rate": 6.09919650977304e-06, "loss": 17.6599, "step": 24431 }, { "epoch": 0.4465973275814795, "grad_norm": 5.546747571313554, "learning_rate": 6.0989077379057516e-06, "loss": 17.301, "step": 24432 }, { "epoch": 0.44661560677792606, "grad_norm": 6.397673045441741, "learning_rate": 6.098618962186884e-06, "loss": 17.5818, "step": 24433 }, { "epoch": 0.4466338859743726, "grad_norm": 6.2708999992759145, "learning_rate": 6.098330182617453e-06, "loss": 17.3073, "step": 24434 }, { "epoch": 0.44665216517081907, "grad_norm": 6.074037112045683, "learning_rate": 6.098041399198473e-06, "loss": 17.3755, "step": 24435 }, { "epoch": 0.4466704443672656, "grad_norm": 5.556667156349534, "learning_rate": 6.097752611930951e-06, "loss": 17.13, "step": 24436 }, { "epoch": 0.44668872356371214, "grad_norm": 6.359632585710636, "learning_rate": 6.097463820815901e-06, "loss": 17.6939, "step": 24437 }, { "epoch": 0.4467070027601587, "grad_norm": 6.201687773142437, "learning_rate": 6.0971750258543346e-06, "loss": 17.4796, "step": 24438 }, { "epoch": 0.4467252819566052, "grad_norm": 5.467661989849664, "learning_rate": 6.096886227047267e-06, "loss": 16.8875, "step": 24439 }, { "epoch": 0.4467435611530517, "grad_norm": 7.009486146427265, "learning_rate": 6.0965974243957086e-06, "loss": 17.646, "step": 24440 }, { "epoch": 0.44676184034949823, "grad_norm": 7.013412644793047, "learning_rate": 6.09630861790067e-06, "loss": 17.7202, "step": 24441 }, { "epoch": 0.44678011954594476, "grad_norm": 6.289208607952947, "learning_rate": 6.096019807563165e-06, "loss": 17.4421, "step": 24442 }, { "epoch": 0.4467983987423913, "grad_norm": 6.966372039594387, "learning_rate": 6.0957309933842065e-06, "loss": 17.7467, "step": 24443 }, { "epoch": 0.44681667793883784, "grad_norm": 6.863189672967621, "learning_rate": 6.0954421753648056e-06, "loss": 17.4474, "step": 24444 }, { "epoch": 0.4468349571352843, "grad_norm": 4.46617239274621, "learning_rate": 6.095153353505976e-06, "loss": 16.6909, "step": 24445 }, { "epoch": 0.44685323633173085, "grad_norm": 5.825656605819203, "learning_rate": 6.094864527808727e-06, "loss": 17.0685, "step": 24446 }, { "epoch": 0.4468715155281774, "grad_norm": 4.714162601204519, "learning_rate": 6.0945756982740725e-06, "loss": 16.9861, "step": 24447 }, { "epoch": 0.4468897947246239, "grad_norm": 6.777702908399464, "learning_rate": 6.094286864903026e-06, "loss": 17.6843, "step": 24448 }, { "epoch": 0.4469080739210704, "grad_norm": 6.834913335229522, "learning_rate": 6.0939980276966e-06, "loss": 17.7261, "step": 24449 }, { "epoch": 0.44692635311751694, "grad_norm": 6.745095667864185, "learning_rate": 6.093709186655805e-06, "loss": 17.845, "step": 24450 }, { "epoch": 0.4469446323139635, "grad_norm": 5.955249358373833, "learning_rate": 6.093420341781655e-06, "loss": 17.3205, "step": 24451 }, { "epoch": 0.44696291151041, "grad_norm": 7.981867678401039, "learning_rate": 6.0931314930751606e-06, "loss": 17.9401, "step": 24452 }, { "epoch": 0.44698119070685655, "grad_norm": 7.955055418987271, "learning_rate": 6.092842640537336e-06, "loss": 18.6077, "step": 24453 }, { "epoch": 0.446999469903303, "grad_norm": 8.27575083597538, "learning_rate": 6.0925537841691906e-06, "loss": 18.028, "step": 24454 }, { "epoch": 0.44701774909974956, "grad_norm": 6.397660865664258, "learning_rate": 6.092264923971742e-06, "loss": 17.3809, "step": 24455 }, { "epoch": 0.4470360282961961, "grad_norm": 6.529630593388632, "learning_rate": 6.091976059945998e-06, "loss": 17.7047, "step": 24456 }, { "epoch": 0.44705430749264263, "grad_norm": 6.15868748906537, "learning_rate": 6.091687192092972e-06, "loss": 17.0119, "step": 24457 }, { "epoch": 0.44707258668908917, "grad_norm": 7.1100094140139865, "learning_rate": 6.091398320413679e-06, "loss": 17.6149, "step": 24458 }, { "epoch": 0.44709086588553565, "grad_norm": 7.13538518829046, "learning_rate": 6.091109444909129e-06, "loss": 17.9354, "step": 24459 }, { "epoch": 0.4471091450819822, "grad_norm": 10.354510507253659, "learning_rate": 6.090820565580333e-06, "loss": 18.9472, "step": 24460 }, { "epoch": 0.4471274242784287, "grad_norm": 7.783071261002511, "learning_rate": 6.090531682428306e-06, "loss": 18.0399, "step": 24461 }, { "epoch": 0.44714570347487526, "grad_norm": 6.401935706870166, "learning_rate": 6.090242795454062e-06, "loss": 17.436, "step": 24462 }, { "epoch": 0.4471639826713218, "grad_norm": 4.558443586842041, "learning_rate": 6.089953904658612e-06, "loss": 16.6771, "step": 24463 }, { "epoch": 0.44718226186776827, "grad_norm": 6.583926730349258, "learning_rate": 6.089665010042968e-06, "loss": 17.4754, "step": 24464 }, { "epoch": 0.4472005410642148, "grad_norm": 8.632820674805483, "learning_rate": 6.089376111608141e-06, "loss": 18.8808, "step": 24465 }, { "epoch": 0.44721882026066134, "grad_norm": 7.053539542181739, "learning_rate": 6.089087209355147e-06, "loss": 17.3104, "step": 24466 }, { "epoch": 0.4472370994571079, "grad_norm": 7.452025530751659, "learning_rate": 6.088798303284995e-06, "loss": 17.9288, "step": 24467 }, { "epoch": 0.4472553786535544, "grad_norm": 6.336280300716763, "learning_rate": 6.088509393398701e-06, "loss": 17.484, "step": 24468 }, { "epoch": 0.4472736578500009, "grad_norm": 6.085966663598267, "learning_rate": 6.088220479697274e-06, "loss": 16.9977, "step": 24469 }, { "epoch": 0.44729193704644743, "grad_norm": 5.044427867867533, "learning_rate": 6.087931562181731e-06, "loss": 16.9515, "step": 24470 }, { "epoch": 0.44731021624289397, "grad_norm": 7.232597980795171, "learning_rate": 6.087642640853081e-06, "loss": 17.8035, "step": 24471 }, { "epoch": 0.4473284954393405, "grad_norm": 5.801893114607026, "learning_rate": 6.087353715712337e-06, "loss": 17.2479, "step": 24472 }, { "epoch": 0.44734677463578704, "grad_norm": 8.394168635213568, "learning_rate": 6.087064786760516e-06, "loss": 18.3203, "step": 24473 }, { "epoch": 0.4473650538322335, "grad_norm": 6.320306523985644, "learning_rate": 6.086775853998623e-06, "loss": 17.3289, "step": 24474 }, { "epoch": 0.44738333302868005, "grad_norm": 8.609223173205471, "learning_rate": 6.086486917427678e-06, "loss": 18.4674, "step": 24475 }, { "epoch": 0.4474016122251266, "grad_norm": 6.310444867558349, "learning_rate": 6.086197977048689e-06, "loss": 17.4766, "step": 24476 }, { "epoch": 0.4474198914215731, "grad_norm": 6.807482670978645, "learning_rate": 6.085909032862671e-06, "loss": 17.4981, "step": 24477 }, { "epoch": 0.44743817061801966, "grad_norm": 7.996647139645574, "learning_rate": 6.0856200848706375e-06, "loss": 17.9355, "step": 24478 }, { "epoch": 0.44745644981446614, "grad_norm": 5.510315706969526, "learning_rate": 6.085331133073596e-06, "loss": 17.2577, "step": 24479 }, { "epoch": 0.4474747290109127, "grad_norm": 6.080993520086858, "learning_rate": 6.085042177472567e-06, "loss": 17.5098, "step": 24480 }, { "epoch": 0.4474930082073592, "grad_norm": 6.2517235184031215, "learning_rate": 6.084753218068557e-06, "loss": 17.4185, "step": 24481 }, { "epoch": 0.44751128740380575, "grad_norm": 5.88677226674453, "learning_rate": 6.084464254862582e-06, "loss": 17.347, "step": 24482 }, { "epoch": 0.4475295666002522, "grad_norm": 5.411578797994535, "learning_rate": 6.084175287855654e-06, "loss": 16.9384, "step": 24483 }, { "epoch": 0.44754784579669876, "grad_norm": 6.045058578793596, "learning_rate": 6.0838863170487846e-06, "loss": 17.1605, "step": 24484 }, { "epoch": 0.4475661249931453, "grad_norm": 6.455967636022619, "learning_rate": 6.083597342442989e-06, "loss": 17.7157, "step": 24485 }, { "epoch": 0.44758440418959183, "grad_norm": 6.179426288420043, "learning_rate": 6.083308364039279e-06, "loss": 17.6575, "step": 24486 }, { "epoch": 0.44760268338603837, "grad_norm": 5.652262027740546, "learning_rate": 6.083019381838666e-06, "loss": 17.0972, "step": 24487 }, { "epoch": 0.44762096258248485, "grad_norm": 5.54168902868514, "learning_rate": 6.082730395842165e-06, "loss": 16.751, "step": 24488 }, { "epoch": 0.4476392417789314, "grad_norm": 5.32191399402457, "learning_rate": 6.0824414060507865e-06, "loss": 16.9165, "step": 24489 }, { "epoch": 0.4476575209753779, "grad_norm": 5.216734970115658, "learning_rate": 6.082152412465546e-06, "loss": 16.8919, "step": 24490 }, { "epoch": 0.44767580017182446, "grad_norm": 5.817938937234099, "learning_rate": 6.0818634150874554e-06, "loss": 17.2652, "step": 24491 }, { "epoch": 0.447694079368271, "grad_norm": 7.522501099547367, "learning_rate": 6.081574413917527e-06, "loss": 17.6549, "step": 24492 }, { "epoch": 0.44771235856471747, "grad_norm": 5.931479171545933, "learning_rate": 6.081285408956773e-06, "loss": 17.3432, "step": 24493 }, { "epoch": 0.447730637761164, "grad_norm": 4.41491681837655, "learning_rate": 6.08099640020621e-06, "loss": 16.6533, "step": 24494 }, { "epoch": 0.44774891695761054, "grad_norm": 5.703115331221978, "learning_rate": 6.080707387666847e-06, "loss": 17.5828, "step": 24495 }, { "epoch": 0.4477671961540571, "grad_norm": 5.684241655245052, "learning_rate": 6.080418371339698e-06, "loss": 17.2495, "step": 24496 }, { "epoch": 0.4477854753505036, "grad_norm": 5.922163493167484, "learning_rate": 6.0801293512257765e-06, "loss": 17.3535, "step": 24497 }, { "epoch": 0.4478037545469501, "grad_norm": 6.7646213945996605, "learning_rate": 6.079840327326095e-06, "loss": 17.7324, "step": 24498 }, { "epoch": 0.44782203374339663, "grad_norm": 7.056552852321804, "learning_rate": 6.079551299641667e-06, "loss": 17.7943, "step": 24499 }, { "epoch": 0.44784031293984317, "grad_norm": 6.695332757115799, "learning_rate": 6.079262268173506e-06, "loss": 17.8052, "step": 24500 }, { "epoch": 0.4478585921362897, "grad_norm": 6.137749675423967, "learning_rate": 6.078973232922625e-06, "loss": 17.4122, "step": 24501 }, { "epoch": 0.44787687133273624, "grad_norm": 6.932955082810483, "learning_rate": 6.078684193890036e-06, "loss": 17.692, "step": 24502 }, { "epoch": 0.4478951505291827, "grad_norm": 7.238481088020135, "learning_rate": 6.078395151076751e-06, "loss": 17.6842, "step": 24503 }, { "epoch": 0.44791342972562925, "grad_norm": 6.353353648148848, "learning_rate": 6.078106104483787e-06, "loss": 17.4092, "step": 24504 }, { "epoch": 0.4479317089220758, "grad_norm": 5.394112992320563, "learning_rate": 6.077817054112153e-06, "loss": 17.0963, "step": 24505 }, { "epoch": 0.4479499881185223, "grad_norm": 6.6213831348714125, "learning_rate": 6.077527999962863e-06, "loss": 17.4994, "step": 24506 }, { "epoch": 0.44796826731496886, "grad_norm": 6.238180867135258, "learning_rate": 6.0772389420369315e-06, "loss": 17.5379, "step": 24507 }, { "epoch": 0.44798654651141534, "grad_norm": 6.918233957899457, "learning_rate": 6.076949880335373e-06, "loss": 17.6738, "step": 24508 }, { "epoch": 0.4480048257078619, "grad_norm": 5.578343451360341, "learning_rate": 6.0766608148591965e-06, "loss": 17.0726, "step": 24509 }, { "epoch": 0.4480231049043084, "grad_norm": 6.591828819491328, "learning_rate": 6.0763717456094185e-06, "loss": 17.4182, "step": 24510 }, { "epoch": 0.44804138410075495, "grad_norm": 5.434818719766534, "learning_rate": 6.0760826725870506e-06, "loss": 17.2368, "step": 24511 }, { "epoch": 0.4480596632972015, "grad_norm": 6.644255578742306, "learning_rate": 6.075793595793106e-06, "loss": 17.6142, "step": 24512 }, { "epoch": 0.44807794249364796, "grad_norm": 6.334112684827274, "learning_rate": 6.075504515228597e-06, "loss": 17.2672, "step": 24513 }, { "epoch": 0.4480962216900945, "grad_norm": 5.539732552447868, "learning_rate": 6.075215430894541e-06, "loss": 17.2791, "step": 24514 }, { "epoch": 0.44811450088654103, "grad_norm": 6.359708444980066, "learning_rate": 6.074926342791945e-06, "loss": 17.3534, "step": 24515 }, { "epoch": 0.44813278008298757, "grad_norm": 6.455018272419638, "learning_rate": 6.0746372509218264e-06, "loss": 17.3637, "step": 24516 }, { "epoch": 0.44815105927943405, "grad_norm": 4.894673736300759, "learning_rate": 6.074348155285198e-06, "loss": 16.7497, "step": 24517 }, { "epoch": 0.4481693384758806, "grad_norm": 6.136956746908434, "learning_rate": 6.074059055883074e-06, "loss": 17.667, "step": 24518 }, { "epoch": 0.4481876176723271, "grad_norm": 7.35503859221064, "learning_rate": 6.073769952716465e-06, "loss": 17.8807, "step": 24519 }, { "epoch": 0.44820589686877366, "grad_norm": 7.248474537555424, "learning_rate": 6.073480845786384e-06, "loss": 17.9835, "step": 24520 }, { "epoch": 0.4482241760652202, "grad_norm": 7.084398277839027, "learning_rate": 6.073191735093848e-06, "loss": 18.3454, "step": 24521 }, { "epoch": 0.4482424552616667, "grad_norm": 5.520930994534029, "learning_rate": 6.072902620639867e-06, "loss": 17.1094, "step": 24522 }, { "epoch": 0.4482607344581132, "grad_norm": 6.646691813263984, "learning_rate": 6.0726135024254555e-06, "loss": 17.3124, "step": 24523 }, { "epoch": 0.44827901365455974, "grad_norm": 6.089075594839369, "learning_rate": 6.072324380451626e-06, "loss": 17.284, "step": 24524 }, { "epoch": 0.4482972928510063, "grad_norm": 5.835320371397225, "learning_rate": 6.072035254719394e-06, "loss": 17.2227, "step": 24525 }, { "epoch": 0.4483155720474528, "grad_norm": 7.1010148801621185, "learning_rate": 6.0717461252297706e-06, "loss": 17.6833, "step": 24526 }, { "epoch": 0.4483338512438993, "grad_norm": 7.871774532027448, "learning_rate": 6.071456991983771e-06, "loss": 17.5686, "step": 24527 }, { "epoch": 0.44835213044034583, "grad_norm": 6.50344425580137, "learning_rate": 6.071167854982406e-06, "loss": 17.5485, "step": 24528 }, { "epoch": 0.44837040963679237, "grad_norm": 5.7587372362784865, "learning_rate": 6.070878714226691e-06, "loss": 17.1437, "step": 24529 }, { "epoch": 0.4483886888332389, "grad_norm": 7.974223587236758, "learning_rate": 6.07058956971764e-06, "loss": 17.9765, "step": 24530 }, { "epoch": 0.44840696802968544, "grad_norm": 5.616838625710783, "learning_rate": 6.070300421456264e-06, "loss": 17.3922, "step": 24531 }, { "epoch": 0.4484252472261319, "grad_norm": 6.365724697268386, "learning_rate": 6.070011269443581e-06, "loss": 17.5502, "step": 24532 }, { "epoch": 0.44844352642257845, "grad_norm": 5.381085605556312, "learning_rate": 6.0697221136805975e-06, "loss": 17.1453, "step": 24533 }, { "epoch": 0.448461805619025, "grad_norm": 7.3249294609347375, "learning_rate": 6.069432954168333e-06, "loss": 17.647, "step": 24534 }, { "epoch": 0.4484800848154715, "grad_norm": 6.86413334020217, "learning_rate": 6.069143790907799e-06, "loss": 17.7492, "step": 24535 }, { "epoch": 0.44849836401191806, "grad_norm": 6.857971680791453, "learning_rate": 6.068854623900008e-06, "loss": 17.7351, "step": 24536 }, { "epoch": 0.44851664320836454, "grad_norm": 6.371194485979414, "learning_rate": 6.068565453145975e-06, "loss": 17.5003, "step": 24537 }, { "epoch": 0.4485349224048111, "grad_norm": 7.202973564893784, "learning_rate": 6.068276278646711e-06, "loss": 17.9177, "step": 24538 }, { "epoch": 0.4485532016012576, "grad_norm": 6.680424328765258, "learning_rate": 6.067987100403233e-06, "loss": 17.7232, "step": 24539 }, { "epoch": 0.44857148079770415, "grad_norm": 6.039204159314783, "learning_rate": 6.067697918416553e-06, "loss": 17.2954, "step": 24540 }, { "epoch": 0.4485897599941507, "grad_norm": 5.480267338243896, "learning_rate": 6.067408732687684e-06, "loss": 17.2093, "step": 24541 }, { "epoch": 0.44860803919059716, "grad_norm": 7.426914394730756, "learning_rate": 6.06711954321764e-06, "loss": 17.863, "step": 24542 }, { "epoch": 0.4486263183870437, "grad_norm": 6.464402359165225, "learning_rate": 6.066830350007435e-06, "loss": 17.5623, "step": 24543 }, { "epoch": 0.44864459758349023, "grad_norm": 6.824830301997114, "learning_rate": 6.066541153058081e-06, "loss": 17.7391, "step": 24544 }, { "epoch": 0.44866287677993677, "grad_norm": 7.5960022816052595, "learning_rate": 6.066251952370594e-06, "loss": 18.0109, "step": 24545 }, { "epoch": 0.4486811559763833, "grad_norm": 5.759280762000812, "learning_rate": 6.0659627479459856e-06, "loss": 17.2945, "step": 24546 }, { "epoch": 0.4486994351728298, "grad_norm": 6.07739225476777, "learning_rate": 6.065673539785271e-06, "loss": 17.3572, "step": 24547 }, { "epoch": 0.4487177143692763, "grad_norm": 7.522781197636879, "learning_rate": 6.065384327889462e-06, "loss": 17.8289, "step": 24548 }, { "epoch": 0.44873599356572286, "grad_norm": 5.416111713084062, "learning_rate": 6.065095112259575e-06, "loss": 17.0497, "step": 24549 }, { "epoch": 0.4487542727621694, "grad_norm": 6.2261954311888505, "learning_rate": 6.064805892896621e-06, "loss": 17.4546, "step": 24550 }, { "epoch": 0.4487725519586159, "grad_norm": 6.698559746903378, "learning_rate": 6.0645166698016145e-06, "loss": 17.4948, "step": 24551 }, { "epoch": 0.4487908311550624, "grad_norm": 5.989076779748443, "learning_rate": 6.06422744297557e-06, "loss": 17.2271, "step": 24552 }, { "epoch": 0.44880911035150894, "grad_norm": 6.786639717361195, "learning_rate": 6.063938212419501e-06, "loss": 17.6503, "step": 24553 }, { "epoch": 0.4488273895479555, "grad_norm": 7.987285151832187, "learning_rate": 6.06364897813442e-06, "loss": 17.8774, "step": 24554 }, { "epoch": 0.448845668744402, "grad_norm": 7.534178762851669, "learning_rate": 6.063359740121342e-06, "loss": 18.2498, "step": 24555 }, { "epoch": 0.4488639479408485, "grad_norm": 7.818544921555647, "learning_rate": 6.063070498381281e-06, "loss": 18.225, "step": 24556 }, { "epoch": 0.44888222713729503, "grad_norm": 6.150402645822434, "learning_rate": 6.0627812529152496e-06, "loss": 17.2284, "step": 24557 }, { "epoch": 0.44890050633374157, "grad_norm": 6.452872580875381, "learning_rate": 6.062492003724262e-06, "loss": 17.5387, "step": 24558 }, { "epoch": 0.4489187855301881, "grad_norm": 5.609052918892873, "learning_rate": 6.0622027508093325e-06, "loss": 17.2485, "step": 24559 }, { "epoch": 0.44893706472663464, "grad_norm": 6.278336245370838, "learning_rate": 6.061913494171474e-06, "loss": 17.2655, "step": 24560 }, { "epoch": 0.4489553439230811, "grad_norm": 8.9152948236935, "learning_rate": 6.0616242338117005e-06, "loss": 18.1979, "step": 24561 }, { "epoch": 0.44897362311952765, "grad_norm": 6.088876698717248, "learning_rate": 6.0613349697310275e-06, "loss": 17.317, "step": 24562 }, { "epoch": 0.4489919023159742, "grad_norm": 5.977949774786967, "learning_rate": 6.061045701930468e-06, "loss": 17.5265, "step": 24563 }, { "epoch": 0.4490101815124207, "grad_norm": 6.291955535729224, "learning_rate": 6.060756430411033e-06, "loss": 17.6719, "step": 24564 }, { "epoch": 0.44902846070886726, "grad_norm": 5.83042788133474, "learning_rate": 6.060467155173739e-06, "loss": 17.2278, "step": 24565 }, { "epoch": 0.44904673990531374, "grad_norm": 9.914272324998482, "learning_rate": 6.0601778762196016e-06, "loss": 17.3401, "step": 24566 }, { "epoch": 0.4490650191017603, "grad_norm": 5.88376955223234, "learning_rate": 6.059888593549632e-06, "loss": 17.3663, "step": 24567 }, { "epoch": 0.4490832982982068, "grad_norm": 6.75817276328447, "learning_rate": 6.059599307164845e-06, "loss": 17.7442, "step": 24568 }, { "epoch": 0.44910157749465335, "grad_norm": 5.464164571011008, "learning_rate": 6.059310017066254e-06, "loss": 17.2119, "step": 24569 }, { "epoch": 0.4491198566910999, "grad_norm": 5.066354716612397, "learning_rate": 6.059020723254874e-06, "loss": 17.247, "step": 24570 }, { "epoch": 0.44913813588754636, "grad_norm": 6.02637680820274, "learning_rate": 6.058731425731716e-06, "loss": 17.4971, "step": 24571 }, { "epoch": 0.4491564150839929, "grad_norm": 6.5116680623856835, "learning_rate": 6.058442124497799e-06, "loss": 17.6569, "step": 24572 }, { "epoch": 0.44917469428043943, "grad_norm": 7.547813129000829, "learning_rate": 6.058152819554134e-06, "loss": 17.994, "step": 24573 }, { "epoch": 0.44919297347688597, "grad_norm": 5.78438643898109, "learning_rate": 6.057863510901733e-06, "loss": 17.3803, "step": 24574 }, { "epoch": 0.4492112526733325, "grad_norm": 6.207095392725289, "learning_rate": 6.057574198541614e-06, "loss": 17.4389, "step": 24575 }, { "epoch": 0.449229531869779, "grad_norm": 7.965840733566035, "learning_rate": 6.057284882474788e-06, "loss": 18.28, "step": 24576 }, { "epoch": 0.4492478110662255, "grad_norm": 5.2206629645349265, "learning_rate": 6.056995562702271e-06, "loss": 17.176, "step": 24577 }, { "epoch": 0.44926609026267206, "grad_norm": 6.88168788536299, "learning_rate": 6.056706239225076e-06, "loss": 18.1214, "step": 24578 }, { "epoch": 0.4492843694591186, "grad_norm": 6.376165550817576, "learning_rate": 6.056416912044217e-06, "loss": 17.6409, "step": 24579 }, { "epoch": 0.44930264865556513, "grad_norm": 7.4322640736029, "learning_rate": 6.0561275811607104e-06, "loss": 17.9083, "step": 24580 }, { "epoch": 0.4493209278520116, "grad_norm": 6.9442375255901405, "learning_rate": 6.055838246575566e-06, "loss": 17.6621, "step": 24581 }, { "epoch": 0.44933920704845814, "grad_norm": 5.6862610391781265, "learning_rate": 6.055548908289801e-06, "loss": 17.0563, "step": 24582 }, { "epoch": 0.4493574862449047, "grad_norm": 6.955649417451342, "learning_rate": 6.055259566304429e-06, "loss": 17.5888, "step": 24583 }, { "epoch": 0.4493757654413512, "grad_norm": 6.470692818883817, "learning_rate": 6.054970220620463e-06, "loss": 17.3649, "step": 24584 }, { "epoch": 0.4493940446377977, "grad_norm": 9.814720207131511, "learning_rate": 6.054680871238918e-06, "loss": 18.5805, "step": 24585 }, { "epoch": 0.44941232383424423, "grad_norm": 6.234637690888976, "learning_rate": 6.054391518160808e-06, "loss": 17.7208, "step": 24586 }, { "epoch": 0.44943060303069077, "grad_norm": 5.97628720273616, "learning_rate": 6.054102161387147e-06, "loss": 17.4372, "step": 24587 }, { "epoch": 0.4494488822271373, "grad_norm": 7.366581984919246, "learning_rate": 6.053812800918951e-06, "loss": 17.609, "step": 24588 }, { "epoch": 0.44946716142358384, "grad_norm": 6.589630624354999, "learning_rate": 6.053523436757232e-06, "loss": 17.6204, "step": 24589 }, { "epoch": 0.4494854406200303, "grad_norm": 6.370969219120875, "learning_rate": 6.053234068903004e-06, "loss": 17.3953, "step": 24590 }, { "epoch": 0.44950371981647685, "grad_norm": 5.747775020938252, "learning_rate": 6.052944697357283e-06, "loss": 17.2095, "step": 24591 }, { "epoch": 0.4495219990129234, "grad_norm": 5.984339777999843, "learning_rate": 6.052655322121081e-06, "loss": 17.479, "step": 24592 }, { "epoch": 0.4495402782093699, "grad_norm": 6.84481552137433, "learning_rate": 6.052365943195413e-06, "loss": 17.8045, "step": 24593 }, { "epoch": 0.44955855740581646, "grad_norm": 7.8522367904267405, "learning_rate": 6.0520765605812956e-06, "loss": 18.0193, "step": 24594 }, { "epoch": 0.44957683660226294, "grad_norm": 5.309549928513325, "learning_rate": 6.051787174279741e-06, "loss": 17.0146, "step": 24595 }, { "epoch": 0.4495951157987095, "grad_norm": 5.985642368716371, "learning_rate": 6.051497784291762e-06, "loss": 17.5111, "step": 24596 }, { "epoch": 0.449613394995156, "grad_norm": 6.398311933102849, "learning_rate": 6.051208390618375e-06, "loss": 17.4885, "step": 24597 }, { "epoch": 0.44963167419160255, "grad_norm": 6.720363386418574, "learning_rate": 6.050918993260595e-06, "loss": 17.4864, "step": 24598 }, { "epoch": 0.4496499533880491, "grad_norm": 6.057645664109545, "learning_rate": 6.050629592219434e-06, "loss": 17.5053, "step": 24599 }, { "epoch": 0.44966823258449556, "grad_norm": 6.683413351050948, "learning_rate": 6.050340187495908e-06, "loss": 17.4866, "step": 24600 }, { "epoch": 0.4496865117809421, "grad_norm": 6.5973144254902, "learning_rate": 6.05005077909103e-06, "loss": 17.6612, "step": 24601 }, { "epoch": 0.44970479097738864, "grad_norm": 5.997397751800434, "learning_rate": 6.049761367005815e-06, "loss": 17.5349, "step": 24602 }, { "epoch": 0.44972307017383517, "grad_norm": 6.240040430748273, "learning_rate": 6.049471951241279e-06, "loss": 17.4376, "step": 24603 }, { "epoch": 0.4497413493702817, "grad_norm": 5.208798792321488, "learning_rate": 6.049182531798434e-06, "loss": 17.0191, "step": 24604 }, { "epoch": 0.4497596285667282, "grad_norm": 5.919284012950762, "learning_rate": 6.048893108678295e-06, "loss": 17.2049, "step": 24605 }, { "epoch": 0.4497779077631747, "grad_norm": 6.7964267950890385, "learning_rate": 6.0486036818818775e-06, "loss": 17.5385, "step": 24606 }, { "epoch": 0.44979618695962126, "grad_norm": 7.811179831176289, "learning_rate": 6.048314251410193e-06, "loss": 17.5783, "step": 24607 }, { "epoch": 0.4498144661560678, "grad_norm": 6.408143347933946, "learning_rate": 6.048024817264261e-06, "loss": 17.7159, "step": 24608 }, { "epoch": 0.44983274535251433, "grad_norm": 7.157801644792308, "learning_rate": 6.047735379445092e-06, "loss": 17.6406, "step": 24609 }, { "epoch": 0.4498510245489608, "grad_norm": 6.714112451459579, "learning_rate": 6.047445937953701e-06, "loss": 17.6858, "step": 24610 }, { "epoch": 0.44986930374540735, "grad_norm": 5.638619957228446, "learning_rate": 6.047156492791102e-06, "loss": 17.0898, "step": 24611 }, { "epoch": 0.4498875829418539, "grad_norm": 6.660373547952849, "learning_rate": 6.046867043958311e-06, "loss": 18.0357, "step": 24612 }, { "epoch": 0.4499058621383004, "grad_norm": 5.914512935480703, "learning_rate": 6.046577591456343e-06, "loss": 17.0058, "step": 24613 }, { "epoch": 0.44992414133474695, "grad_norm": 6.288417485868926, "learning_rate": 6.0462881352862115e-06, "loss": 17.5398, "step": 24614 }, { "epoch": 0.44994242053119343, "grad_norm": 5.8650711695290525, "learning_rate": 6.045998675448927e-06, "loss": 17.2077, "step": 24615 }, { "epoch": 0.44996069972763997, "grad_norm": 5.990436894644584, "learning_rate": 6.045709211945512e-06, "loss": 17.223, "step": 24616 }, { "epoch": 0.4499789789240865, "grad_norm": 7.495889648121055, "learning_rate": 6.045419744776976e-06, "loss": 18.0186, "step": 24617 }, { "epoch": 0.44999725812053304, "grad_norm": 8.213885327731585, "learning_rate": 6.045130273944334e-06, "loss": 18.2507, "step": 24618 }, { "epoch": 0.4500155373169795, "grad_norm": 6.141209230567598, "learning_rate": 6.044840799448602e-06, "loss": 17.2438, "step": 24619 }, { "epoch": 0.45003381651342605, "grad_norm": 5.854929778397421, "learning_rate": 6.044551321290791e-06, "loss": 17.3367, "step": 24620 }, { "epoch": 0.4500520957098726, "grad_norm": 8.045059826317729, "learning_rate": 6.044261839471921e-06, "loss": 17.5542, "step": 24621 }, { "epoch": 0.4500703749063191, "grad_norm": 5.220343005622515, "learning_rate": 6.043972353993004e-06, "loss": 17.014, "step": 24622 }, { "epoch": 0.45008865410276566, "grad_norm": 5.407247716345242, "learning_rate": 6.043682864855053e-06, "loss": 17.0192, "step": 24623 }, { "epoch": 0.45010693329921214, "grad_norm": 6.97528893326854, "learning_rate": 6.0433933720590845e-06, "loss": 17.7547, "step": 24624 }, { "epoch": 0.4501252124956587, "grad_norm": 6.01521567472642, "learning_rate": 6.0431038756061135e-06, "loss": 17.1485, "step": 24625 }, { "epoch": 0.4501434916921052, "grad_norm": 5.651430175005524, "learning_rate": 6.0428143754971526e-06, "loss": 17.0379, "step": 24626 }, { "epoch": 0.45016177088855175, "grad_norm": 6.778124048190056, "learning_rate": 6.042524871733218e-06, "loss": 17.4037, "step": 24627 }, { "epoch": 0.4501800500849983, "grad_norm": 5.674727814042923, "learning_rate": 6.042235364315325e-06, "loss": 17.1581, "step": 24628 }, { "epoch": 0.45019832928144476, "grad_norm": 6.423002753043599, "learning_rate": 6.0419458532444875e-06, "loss": 17.5022, "step": 24629 }, { "epoch": 0.4502166084778913, "grad_norm": 7.302713204386245, "learning_rate": 6.04165633852172e-06, "loss": 17.8249, "step": 24630 }, { "epoch": 0.45023488767433784, "grad_norm": 7.458820722608766, "learning_rate": 6.041366820148037e-06, "loss": 18.0166, "step": 24631 }, { "epoch": 0.45025316687078437, "grad_norm": 6.194179028173427, "learning_rate": 6.0410772981244555e-06, "loss": 17.6362, "step": 24632 }, { "epoch": 0.4502714460672309, "grad_norm": 7.4911200748010245, "learning_rate": 6.040787772451986e-06, "loss": 17.8867, "step": 24633 }, { "epoch": 0.4502897252636774, "grad_norm": 6.057510729245927, "learning_rate": 6.040498243131646e-06, "loss": 17.2536, "step": 24634 }, { "epoch": 0.4503080044601239, "grad_norm": 8.547528988042247, "learning_rate": 6.040208710164451e-06, "loss": 17.9104, "step": 24635 }, { "epoch": 0.45032628365657046, "grad_norm": 6.650799585296418, "learning_rate": 6.0399191735514154e-06, "loss": 17.4979, "step": 24636 }, { "epoch": 0.450344562853017, "grad_norm": 5.931705650138957, "learning_rate": 6.039629633293552e-06, "loss": 17.0645, "step": 24637 }, { "epoch": 0.45036284204946353, "grad_norm": 6.823833292013225, "learning_rate": 6.039340089391876e-06, "loss": 17.4026, "step": 24638 }, { "epoch": 0.45038112124591, "grad_norm": 6.347875714287686, "learning_rate": 6.039050541847405e-06, "loss": 17.552, "step": 24639 }, { "epoch": 0.45039940044235655, "grad_norm": 6.138020966787515, "learning_rate": 6.038760990661151e-06, "loss": 17.2043, "step": 24640 }, { "epoch": 0.4504176796388031, "grad_norm": 6.06862788956139, "learning_rate": 6.03847143583413e-06, "loss": 17.286, "step": 24641 }, { "epoch": 0.4504359588352496, "grad_norm": 6.661539874719218, "learning_rate": 6.038181877367358e-06, "loss": 17.328, "step": 24642 }, { "epoch": 0.45045423803169615, "grad_norm": 6.225700155227202, "learning_rate": 6.037892315261847e-06, "loss": 17.5815, "step": 24643 }, { "epoch": 0.45047251722814263, "grad_norm": 6.21853811677487, "learning_rate": 6.037602749518614e-06, "loss": 17.4411, "step": 24644 }, { "epoch": 0.45049079642458917, "grad_norm": 6.902656552265081, "learning_rate": 6.0373131801386734e-06, "loss": 17.6042, "step": 24645 }, { "epoch": 0.4505090756210357, "grad_norm": 7.846094296385334, "learning_rate": 6.0370236071230414e-06, "loss": 17.962, "step": 24646 }, { "epoch": 0.45052735481748224, "grad_norm": 5.717781758447652, "learning_rate": 6.036734030472729e-06, "loss": 17.1185, "step": 24647 }, { "epoch": 0.4505456340139288, "grad_norm": 4.763707573529443, "learning_rate": 6.036444450188755e-06, "loss": 16.8422, "step": 24648 }, { "epoch": 0.45056391321037526, "grad_norm": 6.732233461299591, "learning_rate": 6.036154866272135e-06, "loss": 17.6115, "step": 24649 }, { "epoch": 0.4505821924068218, "grad_norm": 5.251545990872237, "learning_rate": 6.03586527872388e-06, "loss": 17.0011, "step": 24650 }, { "epoch": 0.4506004716032683, "grad_norm": 6.707542053326857, "learning_rate": 6.035575687545008e-06, "loss": 17.874, "step": 24651 }, { "epoch": 0.45061875079971486, "grad_norm": 5.854115273020799, "learning_rate": 6.035286092736532e-06, "loss": 17.3612, "step": 24652 }, { "epoch": 0.45063702999616134, "grad_norm": 6.948975364826289, "learning_rate": 6.0349964942994685e-06, "loss": 17.805, "step": 24653 }, { "epoch": 0.4506553091926079, "grad_norm": 6.999709866000698, "learning_rate": 6.034706892234833e-06, "loss": 17.9041, "step": 24654 }, { "epoch": 0.4506735883890544, "grad_norm": 7.182306356613054, "learning_rate": 6.034417286543639e-06, "loss": 17.9241, "step": 24655 }, { "epoch": 0.45069186758550095, "grad_norm": 6.594254229495999, "learning_rate": 6.034127677226902e-06, "loss": 17.443, "step": 24656 }, { "epoch": 0.4507101467819475, "grad_norm": 5.478485975266605, "learning_rate": 6.033838064285638e-06, "loss": 17.2909, "step": 24657 }, { "epoch": 0.45072842597839397, "grad_norm": 6.41011908912437, "learning_rate": 6.03354844772086e-06, "loss": 17.5707, "step": 24658 }, { "epoch": 0.4507467051748405, "grad_norm": 8.414537097297394, "learning_rate": 6.033258827533586e-06, "loss": 18.035, "step": 24659 }, { "epoch": 0.45076498437128704, "grad_norm": 7.001185991744438, "learning_rate": 6.032969203724828e-06, "loss": 17.7875, "step": 24660 }, { "epoch": 0.45078326356773357, "grad_norm": 6.089805242115694, "learning_rate": 6.032679576295603e-06, "loss": 17.1514, "step": 24661 }, { "epoch": 0.4508015427641801, "grad_norm": 5.927690599311975, "learning_rate": 6.032389945246925e-06, "loss": 17.3021, "step": 24662 }, { "epoch": 0.4508198219606266, "grad_norm": 6.095567075875497, "learning_rate": 6.032100310579812e-06, "loss": 17.4224, "step": 24663 }, { "epoch": 0.4508381011570731, "grad_norm": 4.9729292296281935, "learning_rate": 6.031810672295275e-06, "loss": 16.9422, "step": 24664 }, { "epoch": 0.45085638035351966, "grad_norm": 7.196499713694455, "learning_rate": 6.03152103039433e-06, "loss": 18.2755, "step": 24665 }, { "epoch": 0.4508746595499662, "grad_norm": 6.45490234704856, "learning_rate": 6.0312313848779965e-06, "loss": 17.451, "step": 24666 }, { "epoch": 0.45089293874641273, "grad_norm": 6.236169856996404, "learning_rate": 6.030941735747285e-06, "loss": 17.2154, "step": 24667 }, { "epoch": 0.4509112179428592, "grad_norm": 6.194245727873289, "learning_rate": 6.0306520830032124e-06, "loss": 17.4885, "step": 24668 }, { "epoch": 0.45092949713930575, "grad_norm": 6.4940184288310565, "learning_rate": 6.030362426646793e-06, "loss": 17.4401, "step": 24669 }, { "epoch": 0.4509477763357523, "grad_norm": 6.039570888625534, "learning_rate": 6.030072766679044e-06, "loss": 17.329, "step": 24670 }, { "epoch": 0.4509660555321988, "grad_norm": 7.073103273252511, "learning_rate": 6.029783103100978e-06, "loss": 17.642, "step": 24671 }, { "epoch": 0.45098433472864535, "grad_norm": 8.236292636508232, "learning_rate": 6.029493435913611e-06, "loss": 18.2369, "step": 24672 }, { "epoch": 0.45100261392509183, "grad_norm": 7.841646819805741, "learning_rate": 6.029203765117961e-06, "loss": 17.8693, "step": 24673 }, { "epoch": 0.45102089312153837, "grad_norm": 5.856147196076327, "learning_rate": 6.02891409071504e-06, "loss": 17.5565, "step": 24674 }, { "epoch": 0.4510391723179849, "grad_norm": 7.68190603764835, "learning_rate": 6.028624412705863e-06, "loss": 17.8626, "step": 24675 }, { "epoch": 0.45105745151443144, "grad_norm": 5.343253256839253, "learning_rate": 6.0283347310914485e-06, "loss": 17.0762, "step": 24676 }, { "epoch": 0.451075730710878, "grad_norm": 5.886742429105178, "learning_rate": 6.028045045872811e-06, "loss": 17.1906, "step": 24677 }, { "epoch": 0.45109400990732446, "grad_norm": 7.367978120929744, "learning_rate": 6.027755357050964e-06, "loss": 17.5718, "step": 24678 }, { "epoch": 0.451112289103771, "grad_norm": 7.035319286772534, "learning_rate": 6.0274656646269215e-06, "loss": 17.4689, "step": 24679 }, { "epoch": 0.4511305683002175, "grad_norm": 5.245110714669578, "learning_rate": 6.027175968601704e-06, "loss": 16.8598, "step": 24680 }, { "epoch": 0.45114884749666406, "grad_norm": 7.052759396033848, "learning_rate": 6.026886268976322e-06, "loss": 17.6692, "step": 24681 }, { "epoch": 0.4511671266931106, "grad_norm": 6.680488552848285, "learning_rate": 6.026596565751794e-06, "loss": 17.6147, "step": 24682 }, { "epoch": 0.4511854058895571, "grad_norm": 8.496915746510703, "learning_rate": 6.026306858929133e-06, "loss": 18.4555, "step": 24683 }, { "epoch": 0.4512036850860036, "grad_norm": 6.630456798807599, "learning_rate": 6.026017148509355e-06, "loss": 17.6023, "step": 24684 }, { "epoch": 0.45122196428245015, "grad_norm": 8.436450617612355, "learning_rate": 6.025727434493477e-06, "loss": 17.9402, "step": 24685 }, { "epoch": 0.4512402434788967, "grad_norm": 6.927448939750945, "learning_rate": 6.025437716882513e-06, "loss": 17.8359, "step": 24686 }, { "epoch": 0.45125852267534317, "grad_norm": 6.830529479138461, "learning_rate": 6.02514799567748e-06, "loss": 17.8449, "step": 24687 }, { "epoch": 0.4512768018717897, "grad_norm": 4.461113871469783, "learning_rate": 6.02485827087939e-06, "loss": 16.8073, "step": 24688 }, { "epoch": 0.45129508106823624, "grad_norm": 6.770820637432052, "learning_rate": 6.024568542489262e-06, "loss": 17.6159, "step": 24689 }, { "epoch": 0.4513133602646828, "grad_norm": 6.644486299362443, "learning_rate": 6.0242788105081106e-06, "loss": 17.5468, "step": 24690 }, { "epoch": 0.4513316394611293, "grad_norm": 7.095053551366182, "learning_rate": 6.023989074936951e-06, "loss": 17.7514, "step": 24691 }, { "epoch": 0.4513499186575758, "grad_norm": 5.998084138138065, "learning_rate": 6.0236993357767955e-06, "loss": 17.3063, "step": 24692 }, { "epoch": 0.4513681978540223, "grad_norm": 7.585781276479587, "learning_rate": 6.023409593028666e-06, "loss": 18.1675, "step": 24693 }, { "epoch": 0.45138647705046886, "grad_norm": 6.2665781415529365, "learning_rate": 6.0231198466935745e-06, "loss": 17.5643, "step": 24694 }, { "epoch": 0.4514047562469154, "grad_norm": 7.750558703464514, "learning_rate": 6.0228300967725365e-06, "loss": 17.747, "step": 24695 }, { "epoch": 0.45142303544336193, "grad_norm": 7.602132480396472, "learning_rate": 6.022540343266566e-06, "loss": 17.7959, "step": 24696 }, { "epoch": 0.4514413146398084, "grad_norm": 7.7349738302042566, "learning_rate": 6.022250586176683e-06, "loss": 17.7687, "step": 24697 }, { "epoch": 0.45145959383625495, "grad_norm": 6.355945855864465, "learning_rate": 6.021960825503897e-06, "loss": 17.4123, "step": 24698 }, { "epoch": 0.4514778730327015, "grad_norm": 6.6578356518514, "learning_rate": 6.021671061249229e-06, "loss": 17.4073, "step": 24699 }, { "epoch": 0.451496152229148, "grad_norm": 5.713505249244532, "learning_rate": 6.021381293413693e-06, "loss": 17.1641, "step": 24700 }, { "epoch": 0.45151443142559455, "grad_norm": 5.624758350692372, "learning_rate": 6.021091521998304e-06, "loss": 17.1406, "step": 24701 }, { "epoch": 0.45153271062204103, "grad_norm": 5.948590414234612, "learning_rate": 6.020801747004077e-06, "loss": 17.2232, "step": 24702 }, { "epoch": 0.45155098981848757, "grad_norm": 6.155157389616313, "learning_rate": 6.020511968432029e-06, "loss": 17.4346, "step": 24703 }, { "epoch": 0.4515692690149341, "grad_norm": 5.474975628502764, "learning_rate": 6.020222186283175e-06, "loss": 17.1878, "step": 24704 }, { "epoch": 0.45158754821138064, "grad_norm": 5.084933908634606, "learning_rate": 6.019932400558531e-06, "loss": 17.007, "step": 24705 }, { "epoch": 0.4516058274078272, "grad_norm": 5.23240276581489, "learning_rate": 6.019642611259111e-06, "loss": 17.048, "step": 24706 }, { "epoch": 0.45162410660427366, "grad_norm": 6.451885620662344, "learning_rate": 6.019352818385934e-06, "loss": 17.3354, "step": 24707 }, { "epoch": 0.4516423858007202, "grad_norm": 8.407424004543389, "learning_rate": 6.019063021940014e-06, "loss": 17.9548, "step": 24708 }, { "epoch": 0.4516606649971667, "grad_norm": 5.788778329765934, "learning_rate": 6.018773221922366e-06, "loss": 17.2667, "step": 24709 }, { "epoch": 0.45167894419361326, "grad_norm": 6.064469964372707, "learning_rate": 6.018483418334006e-06, "loss": 17.3951, "step": 24710 }, { "epoch": 0.4516972233900598, "grad_norm": 8.66837895395117, "learning_rate": 6.01819361117595e-06, "loss": 18.1703, "step": 24711 }, { "epoch": 0.4517155025865063, "grad_norm": 6.830167778453176, "learning_rate": 6.0179038004492144e-06, "loss": 17.4709, "step": 24712 }, { "epoch": 0.4517337817829528, "grad_norm": 6.965211136666187, "learning_rate": 6.017613986154813e-06, "loss": 17.6896, "step": 24713 }, { "epoch": 0.45175206097939935, "grad_norm": 6.262331316183803, "learning_rate": 6.017324168293763e-06, "loss": 17.2691, "step": 24714 }, { "epoch": 0.4517703401758459, "grad_norm": 8.443516784987569, "learning_rate": 6.017034346867081e-06, "loss": 17.7252, "step": 24715 }, { "epoch": 0.4517886193722924, "grad_norm": 6.064474784741955, "learning_rate": 6.016744521875782e-06, "loss": 17.6041, "step": 24716 }, { "epoch": 0.4518068985687389, "grad_norm": 7.668158594365066, "learning_rate": 6.01645469332088e-06, "loss": 18.2217, "step": 24717 }, { "epoch": 0.45182517776518544, "grad_norm": 6.322105330152482, "learning_rate": 6.016164861203395e-06, "loss": 17.5224, "step": 24718 }, { "epoch": 0.451843456961632, "grad_norm": 7.438910802051069, "learning_rate": 6.015875025524338e-06, "loss": 18.0336, "step": 24719 }, { "epoch": 0.4518617361580785, "grad_norm": 6.704452739777389, "learning_rate": 6.015585186284728e-06, "loss": 17.4795, "step": 24720 }, { "epoch": 0.451880015354525, "grad_norm": 6.718555993230533, "learning_rate": 6.015295343485581e-06, "loss": 17.8028, "step": 24721 }, { "epoch": 0.4518982945509715, "grad_norm": 5.925313793829689, "learning_rate": 6.015005497127911e-06, "loss": 17.2797, "step": 24722 }, { "epoch": 0.45191657374741806, "grad_norm": 6.746912088623491, "learning_rate": 6.014715647212736e-06, "loss": 17.4472, "step": 24723 }, { "epoch": 0.4519348529438646, "grad_norm": 6.928342340989377, "learning_rate": 6.014425793741068e-06, "loss": 17.8301, "step": 24724 }, { "epoch": 0.45195313214031113, "grad_norm": 6.042923310933138, "learning_rate": 6.014135936713928e-06, "loss": 17.3002, "step": 24725 }, { "epoch": 0.4519714113367576, "grad_norm": 8.578425695372713, "learning_rate": 6.013846076132329e-06, "loss": 18.3965, "step": 24726 }, { "epoch": 0.45198969053320415, "grad_norm": 6.111551031387784, "learning_rate": 6.013556211997286e-06, "loss": 17.0901, "step": 24727 }, { "epoch": 0.4520079697296507, "grad_norm": 5.5082290783231045, "learning_rate": 6.01326634430982e-06, "loss": 17.04, "step": 24728 }, { "epoch": 0.4520262489260972, "grad_norm": 4.90541389879834, "learning_rate": 6.01297647307094e-06, "loss": 17.063, "step": 24729 }, { "epoch": 0.45204452812254375, "grad_norm": 5.598993418730989, "learning_rate": 6.012686598281666e-06, "loss": 17.3291, "step": 24730 }, { "epoch": 0.45206280731899023, "grad_norm": 7.057060538631161, "learning_rate": 6.012396719943014e-06, "loss": 17.4468, "step": 24731 }, { "epoch": 0.45208108651543677, "grad_norm": 6.644628032127668, "learning_rate": 6.012106838056001e-06, "loss": 17.9096, "step": 24732 }, { "epoch": 0.4520993657118833, "grad_norm": 6.235272183127271, "learning_rate": 6.011816952621639e-06, "loss": 17.4477, "step": 24733 }, { "epoch": 0.45211764490832984, "grad_norm": 7.395452813459593, "learning_rate": 6.011527063640946e-06, "loss": 18.0175, "step": 24734 }, { "epoch": 0.4521359241047764, "grad_norm": 6.41941944832454, "learning_rate": 6.011237171114941e-06, "loss": 17.355, "step": 24735 }, { "epoch": 0.45215420330122286, "grad_norm": 6.074373312847202, "learning_rate": 6.010947275044635e-06, "loss": 17.3462, "step": 24736 }, { "epoch": 0.4521724824976694, "grad_norm": 7.89500808482335, "learning_rate": 6.010657375431047e-06, "loss": 18.4106, "step": 24737 }, { "epoch": 0.45219076169411593, "grad_norm": 7.327090196334108, "learning_rate": 6.010367472275192e-06, "loss": 17.7476, "step": 24738 }, { "epoch": 0.45220904089056246, "grad_norm": 5.418767125407057, "learning_rate": 6.010077565578088e-06, "loss": 17.0018, "step": 24739 }, { "epoch": 0.452227320087009, "grad_norm": 5.740834123665435, "learning_rate": 6.009787655340751e-06, "loss": 17.1866, "step": 24740 }, { "epoch": 0.4522455992834555, "grad_norm": 5.682718616324278, "learning_rate": 6.009497741564194e-06, "loss": 17.313, "step": 24741 }, { "epoch": 0.452263878479902, "grad_norm": 5.373888396178508, "learning_rate": 6.009207824249435e-06, "loss": 17.0231, "step": 24742 }, { "epoch": 0.45228215767634855, "grad_norm": 6.381700126000964, "learning_rate": 6.008917903397491e-06, "loss": 17.6324, "step": 24743 }, { "epoch": 0.4523004368727951, "grad_norm": 5.812652938728273, "learning_rate": 6.008627979009376e-06, "loss": 17.3316, "step": 24744 }, { "epoch": 0.4523187160692416, "grad_norm": 8.068093935386853, "learning_rate": 6.008338051086109e-06, "loss": 18.2421, "step": 24745 }, { "epoch": 0.4523369952656881, "grad_norm": 6.458507344262246, "learning_rate": 6.008048119628705e-06, "loss": 17.3915, "step": 24746 }, { "epoch": 0.45235527446213464, "grad_norm": 6.150373670861808, "learning_rate": 6.007758184638177e-06, "loss": 17.2885, "step": 24747 }, { "epoch": 0.4523735536585812, "grad_norm": 5.755699726914589, "learning_rate": 6.007468246115545e-06, "loss": 17.2085, "step": 24748 }, { "epoch": 0.4523918328550277, "grad_norm": 7.056788174499082, "learning_rate": 6.007178304061827e-06, "loss": 17.987, "step": 24749 }, { "epoch": 0.45241011205147424, "grad_norm": 6.359862268820703, "learning_rate": 6.0068883584780336e-06, "loss": 17.4381, "step": 24750 }, { "epoch": 0.4524283912479207, "grad_norm": 6.187298269703934, "learning_rate": 6.006598409365185e-06, "loss": 17.451, "step": 24751 }, { "epoch": 0.45244667044436726, "grad_norm": 5.255808732439555, "learning_rate": 6.006308456724296e-06, "loss": 17.0127, "step": 24752 }, { "epoch": 0.4524649496408138, "grad_norm": 5.954886878936499, "learning_rate": 6.006018500556383e-06, "loss": 17.5082, "step": 24753 }, { "epoch": 0.45248322883726033, "grad_norm": 7.1927315341034905, "learning_rate": 6.005728540862462e-06, "loss": 17.5332, "step": 24754 }, { "epoch": 0.4525015080337068, "grad_norm": 7.240682473726163, "learning_rate": 6.005438577643551e-06, "loss": 17.4798, "step": 24755 }, { "epoch": 0.45251978723015335, "grad_norm": 7.584413677100589, "learning_rate": 6.005148610900664e-06, "loss": 18.0481, "step": 24756 }, { "epoch": 0.4525380664265999, "grad_norm": 5.44395975670243, "learning_rate": 6.004858640634819e-06, "loss": 16.9502, "step": 24757 }, { "epoch": 0.4525563456230464, "grad_norm": 6.467104458156888, "learning_rate": 6.00456866684703e-06, "loss": 17.4797, "step": 24758 }, { "epoch": 0.45257462481949295, "grad_norm": 6.479292435086554, "learning_rate": 6.004278689538319e-06, "loss": 17.5469, "step": 24759 }, { "epoch": 0.45259290401593943, "grad_norm": 6.200094013034941, "learning_rate": 6.003988708709694e-06, "loss": 17.6831, "step": 24760 }, { "epoch": 0.45261118321238597, "grad_norm": 6.3042271961249305, "learning_rate": 6.003698724362177e-06, "loss": 17.4735, "step": 24761 }, { "epoch": 0.4526294624088325, "grad_norm": 6.796098419122103, "learning_rate": 6.003408736496784e-06, "loss": 17.4488, "step": 24762 }, { "epoch": 0.45264774160527904, "grad_norm": 6.787563180152649, "learning_rate": 6.0031187451145314e-06, "loss": 17.9, "step": 24763 }, { "epoch": 0.4526660208017256, "grad_norm": 5.483334221140463, "learning_rate": 6.002828750216433e-06, "loss": 17.3169, "step": 24764 }, { "epoch": 0.45268429999817206, "grad_norm": 5.447648707127053, "learning_rate": 6.002538751803505e-06, "loss": 17.0144, "step": 24765 }, { "epoch": 0.4527025791946186, "grad_norm": 7.920318139622635, "learning_rate": 6.002248749876769e-06, "loss": 17.6761, "step": 24766 }, { "epoch": 0.45272085839106513, "grad_norm": 6.695960442495997, "learning_rate": 6.001958744437237e-06, "loss": 17.4054, "step": 24767 }, { "epoch": 0.45273913758751166, "grad_norm": 5.701863915732816, "learning_rate": 6.001668735485926e-06, "loss": 17.2778, "step": 24768 }, { "epoch": 0.4527574167839582, "grad_norm": 6.932504052318536, "learning_rate": 6.001378723023854e-06, "loss": 17.5723, "step": 24769 }, { "epoch": 0.4527756959804047, "grad_norm": 7.430116870954375, "learning_rate": 6.001088707052035e-06, "loss": 17.4729, "step": 24770 }, { "epoch": 0.4527939751768512, "grad_norm": 5.385734016428016, "learning_rate": 6.000798687571487e-06, "loss": 17.1351, "step": 24771 }, { "epoch": 0.45281225437329775, "grad_norm": 5.242606336630852, "learning_rate": 6.0005086645832276e-06, "loss": 16.9974, "step": 24772 }, { "epoch": 0.4528305335697443, "grad_norm": 6.5685110889576475, "learning_rate": 6.000218638088273e-06, "loss": 17.6213, "step": 24773 }, { "epoch": 0.4528488127661908, "grad_norm": 5.716236414320557, "learning_rate": 5.999928608087637e-06, "loss": 17.3367, "step": 24774 }, { "epoch": 0.4528670919626373, "grad_norm": 8.564552218972441, "learning_rate": 5.999638574582338e-06, "loss": 18.2329, "step": 24775 }, { "epoch": 0.45288537115908384, "grad_norm": 7.301717290044244, "learning_rate": 5.999348537573394e-06, "loss": 17.4011, "step": 24776 }, { "epoch": 0.4529036503555304, "grad_norm": 6.38934496240644, "learning_rate": 5.99905849706182e-06, "loss": 17.6334, "step": 24777 }, { "epoch": 0.4529219295519769, "grad_norm": 5.2947761883209905, "learning_rate": 5.998768453048632e-06, "loss": 17.122, "step": 24778 }, { "epoch": 0.45294020874842345, "grad_norm": 8.074965858889485, "learning_rate": 5.998478405534845e-06, "loss": 17.8879, "step": 24779 }, { "epoch": 0.4529584879448699, "grad_norm": 6.699026954328017, "learning_rate": 5.998188354521481e-06, "loss": 17.7634, "step": 24780 }, { "epoch": 0.45297676714131646, "grad_norm": 6.182307564133946, "learning_rate": 5.997898300009554e-06, "loss": 17.4829, "step": 24781 }, { "epoch": 0.452995046337763, "grad_norm": 6.368049414384359, "learning_rate": 5.997608242000078e-06, "loss": 17.3981, "step": 24782 }, { "epoch": 0.45301332553420953, "grad_norm": 6.191705584338558, "learning_rate": 5.997318180494071e-06, "loss": 17.2676, "step": 24783 }, { "epoch": 0.45303160473065607, "grad_norm": 5.93275458193394, "learning_rate": 5.997028115492552e-06, "loss": 17.4764, "step": 24784 }, { "epoch": 0.45304988392710255, "grad_norm": 4.5247068484507444, "learning_rate": 5.996738046996535e-06, "loss": 16.7954, "step": 24785 }, { "epoch": 0.4530681631235491, "grad_norm": 7.902685526078171, "learning_rate": 5.99644797500704e-06, "loss": 17.3129, "step": 24786 }, { "epoch": 0.4530864423199956, "grad_norm": 6.06325082023824, "learning_rate": 5.996157899525078e-06, "loss": 17.2844, "step": 24787 }, { "epoch": 0.45310472151644215, "grad_norm": 7.628112177345854, "learning_rate": 5.995867820551671e-06, "loss": 17.9469, "step": 24788 }, { "epoch": 0.45312300071288864, "grad_norm": 7.873043759885314, "learning_rate": 5.995577738087832e-06, "loss": 17.643, "step": 24789 }, { "epoch": 0.45314127990933517, "grad_norm": 6.797267503432025, "learning_rate": 5.995287652134583e-06, "loss": 17.6177, "step": 24790 }, { "epoch": 0.4531595591057817, "grad_norm": 6.406551669096651, "learning_rate": 5.994997562692934e-06, "loss": 17.3904, "step": 24791 }, { "epoch": 0.45317783830222824, "grad_norm": 5.9040284706172335, "learning_rate": 5.994707469763904e-06, "loss": 17.4223, "step": 24792 }, { "epoch": 0.4531961174986748, "grad_norm": 7.130237826587623, "learning_rate": 5.9944173733485125e-06, "loss": 17.6801, "step": 24793 }, { "epoch": 0.45321439669512126, "grad_norm": 6.8328317591233825, "learning_rate": 5.994127273447775e-06, "loss": 17.64, "step": 24794 }, { "epoch": 0.4532326758915678, "grad_norm": 8.590765023395658, "learning_rate": 5.993837170062708e-06, "loss": 18.3209, "step": 24795 }, { "epoch": 0.45325095508801433, "grad_norm": 6.8864481963997015, "learning_rate": 5.993547063194326e-06, "loss": 17.5819, "step": 24796 }, { "epoch": 0.45326923428446086, "grad_norm": 5.636764255944445, "learning_rate": 5.993256952843648e-06, "loss": 17.1017, "step": 24797 }, { "epoch": 0.4532875134809074, "grad_norm": 5.544907470650895, "learning_rate": 5.992966839011691e-06, "loss": 17.2787, "step": 24798 }, { "epoch": 0.4533057926773539, "grad_norm": 6.258601438049628, "learning_rate": 5.992676721699472e-06, "loss": 17.5772, "step": 24799 }, { "epoch": 0.4533240718738004, "grad_norm": 7.644932221665382, "learning_rate": 5.992386600908007e-06, "loss": 17.9644, "step": 24800 }, { "epoch": 0.45334235107024695, "grad_norm": 6.548807081424216, "learning_rate": 5.9920964766383114e-06, "loss": 17.5698, "step": 24801 }, { "epoch": 0.4533606302666935, "grad_norm": 6.928747559619665, "learning_rate": 5.991806348891406e-06, "loss": 17.5953, "step": 24802 }, { "epoch": 0.45337890946314, "grad_norm": 6.24419831583202, "learning_rate": 5.991516217668304e-06, "loss": 17.7199, "step": 24803 }, { "epoch": 0.4533971886595865, "grad_norm": 4.78474495283371, "learning_rate": 5.991226082970025e-06, "loss": 17.0338, "step": 24804 }, { "epoch": 0.45341546785603304, "grad_norm": 5.934566945253231, "learning_rate": 5.9909359447975845e-06, "loss": 17.2925, "step": 24805 }, { "epoch": 0.4534337470524796, "grad_norm": 6.538932399172217, "learning_rate": 5.990645803151998e-06, "loss": 17.7339, "step": 24806 }, { "epoch": 0.4534520262489261, "grad_norm": 7.162529326065977, "learning_rate": 5.990355658034285e-06, "loss": 17.5858, "step": 24807 }, { "epoch": 0.45347030544537265, "grad_norm": 7.854321443510832, "learning_rate": 5.990065509445462e-06, "loss": 18.0197, "step": 24808 }, { "epoch": 0.4534885846418191, "grad_norm": 8.937205810721213, "learning_rate": 5.989775357386544e-06, "loss": 18.1315, "step": 24809 }, { "epoch": 0.45350686383826566, "grad_norm": 5.685135286358327, "learning_rate": 5.989485201858549e-06, "loss": 17.2512, "step": 24810 }, { "epoch": 0.4535251430347122, "grad_norm": 6.0497262468242425, "learning_rate": 5.989195042862495e-06, "loss": 17.3527, "step": 24811 }, { "epoch": 0.45354342223115873, "grad_norm": 7.0295077846462934, "learning_rate": 5.988904880399398e-06, "loss": 17.764, "step": 24812 }, { "epoch": 0.45356170142760527, "grad_norm": 6.518605241306475, "learning_rate": 5.988614714470276e-06, "loss": 17.5902, "step": 24813 }, { "epoch": 0.45357998062405175, "grad_norm": 5.870952484450353, "learning_rate": 5.988324545076144e-06, "loss": 17.538, "step": 24814 }, { "epoch": 0.4535982598204983, "grad_norm": 5.238022253591621, "learning_rate": 5.988034372218021e-06, "loss": 17.2232, "step": 24815 }, { "epoch": 0.4536165390169448, "grad_norm": 6.168754075001521, "learning_rate": 5.987744195896923e-06, "loss": 17.3962, "step": 24816 }, { "epoch": 0.45363481821339136, "grad_norm": 6.673533910286118, "learning_rate": 5.987454016113867e-06, "loss": 17.8792, "step": 24817 }, { "epoch": 0.4536530974098379, "grad_norm": 7.751066565956767, "learning_rate": 5.9871638328698725e-06, "loss": 18.1194, "step": 24818 }, { "epoch": 0.45367137660628437, "grad_norm": 6.507359471953103, "learning_rate": 5.986873646165951e-06, "loss": 17.6494, "step": 24819 }, { "epoch": 0.4536896558027309, "grad_norm": 8.920332758130165, "learning_rate": 5.986583456003124e-06, "loss": 18.2551, "step": 24820 }, { "epoch": 0.45370793499917744, "grad_norm": 7.692489007374717, "learning_rate": 5.98629326238241e-06, "loss": 18.1621, "step": 24821 }, { "epoch": 0.453726214195624, "grad_norm": 6.3461456362686635, "learning_rate": 5.986003065304822e-06, "loss": 17.6561, "step": 24822 }, { "epoch": 0.45374449339207046, "grad_norm": 7.554931960332167, "learning_rate": 5.985712864771378e-06, "loss": 17.7931, "step": 24823 }, { "epoch": 0.453762772588517, "grad_norm": 5.62834503111923, "learning_rate": 5.9854226607830955e-06, "loss": 17.187, "step": 24824 }, { "epoch": 0.45378105178496353, "grad_norm": 7.30912711086053, "learning_rate": 5.985132453340995e-06, "loss": 17.793, "step": 24825 }, { "epoch": 0.45379933098141007, "grad_norm": 6.51726859739539, "learning_rate": 5.9848422424460895e-06, "loss": 17.8442, "step": 24826 }, { "epoch": 0.4538176101778566, "grad_norm": 7.22422985826368, "learning_rate": 5.984552028099396e-06, "loss": 17.9982, "step": 24827 }, { "epoch": 0.4538358893743031, "grad_norm": 7.219433123503709, "learning_rate": 5.984261810301935e-06, "loss": 17.3976, "step": 24828 }, { "epoch": 0.4538541685707496, "grad_norm": 6.654532646470851, "learning_rate": 5.98397158905472e-06, "loss": 17.8668, "step": 24829 }, { "epoch": 0.45387244776719615, "grad_norm": 6.292491359443311, "learning_rate": 5.983681364358771e-06, "loss": 17.4518, "step": 24830 }, { "epoch": 0.4538907269636427, "grad_norm": 7.276505528291034, "learning_rate": 5.983391136215104e-06, "loss": 17.998, "step": 24831 }, { "epoch": 0.4539090061600892, "grad_norm": 7.740058123791623, "learning_rate": 5.983100904624737e-06, "loss": 18.0715, "step": 24832 }, { "epoch": 0.4539272853565357, "grad_norm": 6.70546100657501, "learning_rate": 5.982810669588685e-06, "loss": 17.5629, "step": 24833 }, { "epoch": 0.45394556455298224, "grad_norm": 6.932532836554571, "learning_rate": 5.982520431107968e-06, "loss": 17.6811, "step": 24834 }, { "epoch": 0.4539638437494288, "grad_norm": 5.7676651583805025, "learning_rate": 5.982230189183602e-06, "loss": 17.3768, "step": 24835 }, { "epoch": 0.4539821229458753, "grad_norm": 8.112885332345646, "learning_rate": 5.981939943816605e-06, "loss": 17.9777, "step": 24836 }, { "epoch": 0.45400040214232185, "grad_norm": 5.899663538561533, "learning_rate": 5.981649695007993e-06, "loss": 17.3216, "step": 24837 }, { "epoch": 0.4540186813387683, "grad_norm": 7.48592461627228, "learning_rate": 5.981359442758783e-06, "loss": 17.8734, "step": 24838 }, { "epoch": 0.45403696053521486, "grad_norm": 7.987240402143579, "learning_rate": 5.981069187069996e-06, "loss": 18.1016, "step": 24839 }, { "epoch": 0.4540552397316614, "grad_norm": 5.7988877873438724, "learning_rate": 5.980778927942644e-06, "loss": 17.1847, "step": 24840 }, { "epoch": 0.45407351892810793, "grad_norm": 7.3804228309967135, "learning_rate": 5.980488665377748e-06, "loss": 17.8344, "step": 24841 }, { "epoch": 0.45409179812455447, "grad_norm": 5.739488152069684, "learning_rate": 5.980198399376325e-06, "loss": 17.1668, "step": 24842 }, { "epoch": 0.45411007732100095, "grad_norm": 6.449068281897217, "learning_rate": 5.979908129939391e-06, "loss": 17.4461, "step": 24843 }, { "epoch": 0.4541283565174475, "grad_norm": 6.511989432260754, "learning_rate": 5.979617857067964e-06, "loss": 17.6251, "step": 24844 }, { "epoch": 0.454146635713894, "grad_norm": 7.184755907382845, "learning_rate": 5.979327580763062e-06, "loss": 17.7948, "step": 24845 }, { "epoch": 0.45416491491034056, "grad_norm": 7.397502154523116, "learning_rate": 5.979037301025701e-06, "loss": 17.9996, "step": 24846 }, { "epoch": 0.4541831941067871, "grad_norm": 7.44664159389226, "learning_rate": 5.978747017856898e-06, "loss": 17.6774, "step": 24847 }, { "epoch": 0.45420147330323357, "grad_norm": 6.625820805484883, "learning_rate": 5.978456731257674e-06, "loss": 17.3918, "step": 24848 }, { "epoch": 0.4542197524996801, "grad_norm": 7.660520868003206, "learning_rate": 5.978166441229044e-06, "loss": 17.7056, "step": 24849 }, { "epoch": 0.45423803169612664, "grad_norm": 6.622079502102471, "learning_rate": 5.977876147772025e-06, "loss": 17.1085, "step": 24850 }, { "epoch": 0.4542563108925732, "grad_norm": 7.282386391227921, "learning_rate": 5.977585850887634e-06, "loss": 18.1855, "step": 24851 }, { "epoch": 0.4542745900890197, "grad_norm": 5.043317870253002, "learning_rate": 5.97729555057689e-06, "loss": 16.9436, "step": 24852 }, { "epoch": 0.4542928692854662, "grad_norm": 6.813848243736479, "learning_rate": 5.97700524684081e-06, "loss": 17.655, "step": 24853 }, { "epoch": 0.45431114848191273, "grad_norm": 5.891577703227318, "learning_rate": 5.976714939680412e-06, "loss": 17.1782, "step": 24854 }, { "epoch": 0.45432942767835927, "grad_norm": 7.105356369781019, "learning_rate": 5.976424629096712e-06, "loss": 17.4883, "step": 24855 }, { "epoch": 0.4543477068748058, "grad_norm": 7.454305661770091, "learning_rate": 5.976134315090729e-06, "loss": 17.7958, "step": 24856 }, { "epoch": 0.4543659860712523, "grad_norm": 7.516791832633677, "learning_rate": 5.97584399766348e-06, "loss": 18.2962, "step": 24857 }, { "epoch": 0.4543842652676988, "grad_norm": 7.0906485532158605, "learning_rate": 5.975553676815982e-06, "loss": 17.6407, "step": 24858 }, { "epoch": 0.45440254446414535, "grad_norm": 7.128028939234638, "learning_rate": 5.975263352549253e-06, "loss": 17.8622, "step": 24859 }, { "epoch": 0.4544208236605919, "grad_norm": 6.907942211710578, "learning_rate": 5.97497302486431e-06, "loss": 17.9482, "step": 24860 }, { "epoch": 0.4544391028570384, "grad_norm": 6.368527253496372, "learning_rate": 5.974682693762172e-06, "loss": 17.4452, "step": 24861 }, { "epoch": 0.4544573820534849, "grad_norm": 8.097922008715956, "learning_rate": 5.9743923592438555e-06, "loss": 17.9609, "step": 24862 }, { "epoch": 0.45447566124993144, "grad_norm": 6.807453022908108, "learning_rate": 5.97410202131038e-06, "loss": 17.8506, "step": 24863 }, { "epoch": 0.454493940446378, "grad_norm": 6.664126366238458, "learning_rate": 5.973811679962759e-06, "loss": 17.4324, "step": 24864 }, { "epoch": 0.4545122196428245, "grad_norm": 4.72076247400394, "learning_rate": 5.973521335202013e-06, "loss": 16.7962, "step": 24865 }, { "epoch": 0.45453049883927105, "grad_norm": 5.934770211234447, "learning_rate": 5.97323098702916e-06, "loss": 17.1794, "step": 24866 }, { "epoch": 0.4545487780357175, "grad_norm": 7.472014628053679, "learning_rate": 5.972940635445217e-06, "loss": 17.7042, "step": 24867 }, { "epoch": 0.45456705723216406, "grad_norm": 5.461609249343249, "learning_rate": 5.9726502804512e-06, "loss": 17.0504, "step": 24868 }, { "epoch": 0.4545853364286106, "grad_norm": 6.322879836658449, "learning_rate": 5.97235992204813e-06, "loss": 17.5128, "step": 24869 }, { "epoch": 0.45460361562505713, "grad_norm": 5.059208558620764, "learning_rate": 5.9720695602370215e-06, "loss": 16.9362, "step": 24870 }, { "epoch": 0.45462189482150367, "grad_norm": 7.565480030308469, "learning_rate": 5.971779195018894e-06, "loss": 18.1777, "step": 24871 }, { "epoch": 0.45464017401795015, "grad_norm": 7.677649071696698, "learning_rate": 5.971488826394764e-06, "loss": 17.9081, "step": 24872 }, { "epoch": 0.4546584532143967, "grad_norm": 6.457744832161168, "learning_rate": 5.971198454365652e-06, "loss": 17.5226, "step": 24873 }, { "epoch": 0.4546767324108432, "grad_norm": 8.486369762666998, "learning_rate": 5.970908078932571e-06, "loss": 18.1025, "step": 24874 }, { "epoch": 0.45469501160728976, "grad_norm": 7.373906463484746, "learning_rate": 5.9706177000965434e-06, "loss": 17.8832, "step": 24875 }, { "epoch": 0.4547132908037363, "grad_norm": 7.7853096676110285, "learning_rate": 5.970327317858584e-06, "loss": 18.1932, "step": 24876 }, { "epoch": 0.45473157000018277, "grad_norm": 7.584841019866278, "learning_rate": 5.970036932219714e-06, "loss": 18.2716, "step": 24877 }, { "epoch": 0.4547498491966293, "grad_norm": 6.319330515581081, "learning_rate": 5.9697465431809455e-06, "loss": 17.874, "step": 24878 }, { "epoch": 0.45476812839307584, "grad_norm": 6.481658860565889, "learning_rate": 5.9694561507433e-06, "loss": 17.3555, "step": 24879 }, { "epoch": 0.4547864075895224, "grad_norm": 6.38656394726239, "learning_rate": 5.969165754907796e-06, "loss": 17.7083, "step": 24880 }, { "epoch": 0.4548046867859689, "grad_norm": 5.605613709880605, "learning_rate": 5.96887535567545e-06, "loss": 17.2987, "step": 24881 }, { "epoch": 0.4548229659824154, "grad_norm": 6.6044745124806585, "learning_rate": 5.9685849530472795e-06, "loss": 17.4623, "step": 24882 }, { "epoch": 0.45484124517886193, "grad_norm": 5.850295887623415, "learning_rate": 5.968294547024303e-06, "loss": 17.1722, "step": 24883 }, { "epoch": 0.45485952437530847, "grad_norm": 6.675313290934436, "learning_rate": 5.968004137607538e-06, "loss": 17.9645, "step": 24884 }, { "epoch": 0.454877803571755, "grad_norm": 7.694681496036081, "learning_rate": 5.967713724798003e-06, "loss": 18.1642, "step": 24885 }, { "epoch": 0.45489608276820154, "grad_norm": 7.106606312621698, "learning_rate": 5.9674233085967145e-06, "loss": 17.8321, "step": 24886 }, { "epoch": 0.454914361964648, "grad_norm": 7.192350868472989, "learning_rate": 5.967132889004692e-06, "loss": 17.7427, "step": 24887 }, { "epoch": 0.45493264116109455, "grad_norm": 7.930449156621662, "learning_rate": 5.966842466022952e-06, "loss": 18.1769, "step": 24888 }, { "epoch": 0.4549509203575411, "grad_norm": 5.3265330405979405, "learning_rate": 5.9665520396525135e-06, "loss": 16.9592, "step": 24889 }, { "epoch": 0.4549691995539876, "grad_norm": 7.141694489175982, "learning_rate": 5.966261609894395e-06, "loss": 17.9663, "step": 24890 }, { "epoch": 0.4549874787504341, "grad_norm": 7.5992211434718655, "learning_rate": 5.965971176749612e-06, "loss": 18.0856, "step": 24891 }, { "epoch": 0.45500575794688064, "grad_norm": 6.695374744834487, "learning_rate": 5.965680740219183e-06, "loss": 17.4817, "step": 24892 }, { "epoch": 0.4550240371433272, "grad_norm": 6.740658620386362, "learning_rate": 5.965390300304128e-06, "loss": 17.5219, "step": 24893 }, { "epoch": 0.4550423163397737, "grad_norm": 6.3009442508213445, "learning_rate": 5.965099857005464e-06, "loss": 17.2256, "step": 24894 }, { "epoch": 0.45506059553622025, "grad_norm": 5.805336415765952, "learning_rate": 5.9648094103242096e-06, "loss": 17.1404, "step": 24895 }, { "epoch": 0.4550788747326667, "grad_norm": 6.654660750961497, "learning_rate": 5.96451896026138e-06, "loss": 18.0301, "step": 24896 }, { "epoch": 0.45509715392911326, "grad_norm": 6.159094027491954, "learning_rate": 5.964228506817996e-06, "loss": 17.8566, "step": 24897 }, { "epoch": 0.4551154331255598, "grad_norm": 5.617800758583667, "learning_rate": 5.963938049995075e-06, "loss": 17.1717, "step": 24898 }, { "epoch": 0.45513371232200633, "grad_norm": 5.684078828624881, "learning_rate": 5.963647589793634e-06, "loss": 17.1505, "step": 24899 }, { "epoch": 0.45515199151845287, "grad_norm": 6.164305056396494, "learning_rate": 5.963357126214692e-06, "loss": 17.1597, "step": 24900 }, { "epoch": 0.45517027071489935, "grad_norm": 5.661822102120397, "learning_rate": 5.963066659259267e-06, "loss": 17.461, "step": 24901 }, { "epoch": 0.4551885499113459, "grad_norm": 6.6575164760084835, "learning_rate": 5.962776188928377e-06, "loss": 17.5022, "step": 24902 }, { "epoch": 0.4552068291077924, "grad_norm": 5.634278910422919, "learning_rate": 5.962485715223041e-06, "loss": 17.1694, "step": 24903 }, { "epoch": 0.45522510830423896, "grad_norm": 6.8679635270014705, "learning_rate": 5.962195238144275e-06, "loss": 17.8086, "step": 24904 }, { "epoch": 0.4552433875006855, "grad_norm": 6.154759540486805, "learning_rate": 5.961904757693099e-06, "loss": 17.3594, "step": 24905 }, { "epoch": 0.455261666697132, "grad_norm": 7.605995253946768, "learning_rate": 5.961614273870528e-06, "loss": 17.8852, "step": 24906 }, { "epoch": 0.4552799458935785, "grad_norm": 6.513969623764981, "learning_rate": 5.9613237866775845e-06, "loss": 17.4292, "step": 24907 }, { "epoch": 0.45529822509002504, "grad_norm": 5.226876028951783, "learning_rate": 5.961033296115285e-06, "loss": 17.0332, "step": 24908 }, { "epoch": 0.4553165042864716, "grad_norm": 6.233897614045115, "learning_rate": 5.960742802184646e-06, "loss": 17.5373, "step": 24909 }, { "epoch": 0.4553347834829181, "grad_norm": 6.6655302509229, "learning_rate": 5.9604523048866865e-06, "loss": 17.2439, "step": 24910 }, { "epoch": 0.4553530626793646, "grad_norm": 6.8305913291420985, "learning_rate": 5.960161804222427e-06, "loss": 17.7038, "step": 24911 }, { "epoch": 0.45537134187581113, "grad_norm": 5.805860333489474, "learning_rate": 5.959871300192882e-06, "loss": 17.3428, "step": 24912 }, { "epoch": 0.45538962107225767, "grad_norm": 6.114688002868169, "learning_rate": 5.959580792799071e-06, "loss": 17.515, "step": 24913 }, { "epoch": 0.4554079002687042, "grad_norm": 6.3102515482798776, "learning_rate": 5.959290282042014e-06, "loss": 17.3935, "step": 24914 }, { "epoch": 0.45542617946515074, "grad_norm": 6.762663799426242, "learning_rate": 5.958999767922726e-06, "loss": 17.5704, "step": 24915 }, { "epoch": 0.4554444586615972, "grad_norm": 6.7378267892211126, "learning_rate": 5.95870925044223e-06, "loss": 17.7129, "step": 24916 }, { "epoch": 0.45546273785804375, "grad_norm": 6.833504878076066, "learning_rate": 5.958418729601538e-06, "loss": 17.692, "step": 24917 }, { "epoch": 0.4554810170544903, "grad_norm": 5.306666325362931, "learning_rate": 5.958128205401674e-06, "loss": 17.1075, "step": 24918 }, { "epoch": 0.4554992962509368, "grad_norm": 5.969029578562306, "learning_rate": 5.957837677843652e-06, "loss": 17.4495, "step": 24919 }, { "epoch": 0.45551757544738336, "grad_norm": 5.976336859409207, "learning_rate": 5.957547146928493e-06, "loss": 17.3549, "step": 24920 }, { "epoch": 0.45553585464382984, "grad_norm": 5.806214475317869, "learning_rate": 5.957256612657215e-06, "loss": 17.2555, "step": 24921 }, { "epoch": 0.4555541338402764, "grad_norm": 8.016121191859519, "learning_rate": 5.956966075030834e-06, "loss": 18.102, "step": 24922 }, { "epoch": 0.4555724130367229, "grad_norm": 8.230586744775685, "learning_rate": 5.956675534050371e-06, "loss": 18.5998, "step": 24923 }, { "epoch": 0.45559069223316945, "grad_norm": 6.598971293214389, "learning_rate": 5.956384989716842e-06, "loss": 17.5706, "step": 24924 }, { "epoch": 0.4556089714296159, "grad_norm": 6.186971604231284, "learning_rate": 5.956094442031269e-06, "loss": 17.494, "step": 24925 }, { "epoch": 0.45562725062606246, "grad_norm": 7.136106957598433, "learning_rate": 5.955803890994667e-06, "loss": 17.9298, "step": 24926 }, { "epoch": 0.455645529822509, "grad_norm": 5.9943177520249, "learning_rate": 5.9555133366080545e-06, "loss": 17.3705, "step": 24927 }, { "epoch": 0.45566380901895553, "grad_norm": 7.222490292310578, "learning_rate": 5.95522277887245e-06, "loss": 17.8616, "step": 24928 }, { "epoch": 0.45568208821540207, "grad_norm": 7.608678769348252, "learning_rate": 5.954932217788875e-06, "loss": 17.9525, "step": 24929 }, { "epoch": 0.45570036741184855, "grad_norm": 7.804162846358708, "learning_rate": 5.954641653358343e-06, "loss": 17.8717, "step": 24930 }, { "epoch": 0.4557186466082951, "grad_norm": 6.6028374641371625, "learning_rate": 5.954351085581876e-06, "loss": 17.573, "step": 24931 }, { "epoch": 0.4557369258047416, "grad_norm": 5.98152414111835, "learning_rate": 5.954060514460492e-06, "loss": 17.2916, "step": 24932 }, { "epoch": 0.45575520500118816, "grad_norm": 6.051014682198928, "learning_rate": 5.953769939995206e-06, "loss": 17.3404, "step": 24933 }, { "epoch": 0.4557734841976347, "grad_norm": 6.7063809868234925, "learning_rate": 5.953479362187041e-06, "loss": 17.6344, "step": 24934 }, { "epoch": 0.4557917633940812, "grad_norm": 6.408814761826627, "learning_rate": 5.953188781037015e-06, "loss": 17.1016, "step": 24935 }, { "epoch": 0.4558100425905277, "grad_norm": 7.052933799014533, "learning_rate": 5.952898196546144e-06, "loss": 17.4834, "step": 24936 }, { "epoch": 0.45582832178697424, "grad_norm": 7.127083901926689, "learning_rate": 5.952607608715447e-06, "loss": 17.5944, "step": 24937 }, { "epoch": 0.4558466009834208, "grad_norm": 6.907721348468742, "learning_rate": 5.952317017545941e-06, "loss": 17.5721, "step": 24938 }, { "epoch": 0.4558648801798673, "grad_norm": 4.945476134036083, "learning_rate": 5.952026423038651e-06, "loss": 16.7368, "step": 24939 }, { "epoch": 0.4558831593763138, "grad_norm": 7.127582004948006, "learning_rate": 5.951735825194588e-06, "loss": 18.0291, "step": 24940 }, { "epoch": 0.45590143857276033, "grad_norm": 7.289376816504818, "learning_rate": 5.951445224014773e-06, "loss": 17.4337, "step": 24941 }, { "epoch": 0.45591971776920687, "grad_norm": 7.195187834258994, "learning_rate": 5.951154619500227e-06, "loss": 17.8671, "step": 24942 }, { "epoch": 0.4559379969656534, "grad_norm": 6.080345119539842, "learning_rate": 5.9508640116519656e-06, "loss": 17.2807, "step": 24943 }, { "epoch": 0.45595627616209994, "grad_norm": 7.27655914283795, "learning_rate": 5.950573400471008e-06, "loss": 17.7143, "step": 24944 }, { "epoch": 0.4559745553585464, "grad_norm": 5.197007266471913, "learning_rate": 5.950282785958373e-06, "loss": 16.8949, "step": 24945 }, { "epoch": 0.45599283455499295, "grad_norm": 5.502133312801913, "learning_rate": 5.949992168115081e-06, "loss": 17.0616, "step": 24946 }, { "epoch": 0.4560111137514395, "grad_norm": 7.636449685803365, "learning_rate": 5.949701546942147e-06, "loss": 17.619, "step": 24947 }, { "epoch": 0.456029392947886, "grad_norm": 6.895739600713596, "learning_rate": 5.949410922440592e-06, "loss": 17.6776, "step": 24948 }, { "epoch": 0.45604767214433256, "grad_norm": 7.892914071186963, "learning_rate": 5.9491202946114355e-06, "loss": 17.7504, "step": 24949 }, { "epoch": 0.45606595134077904, "grad_norm": 5.7805011601384315, "learning_rate": 5.948829663455694e-06, "loss": 17.2404, "step": 24950 }, { "epoch": 0.4560842305372256, "grad_norm": 6.332919356755202, "learning_rate": 5.948539028974385e-06, "loss": 17.2974, "step": 24951 }, { "epoch": 0.4561025097336721, "grad_norm": 9.809507334768115, "learning_rate": 5.9482483911685316e-06, "loss": 18.2558, "step": 24952 }, { "epoch": 0.45612078893011865, "grad_norm": 6.073160875066537, "learning_rate": 5.947957750039148e-06, "loss": 17.3897, "step": 24953 }, { "epoch": 0.4561390681265652, "grad_norm": 6.531167093761964, "learning_rate": 5.947667105587256e-06, "loss": 17.439, "step": 24954 }, { "epoch": 0.45615734732301166, "grad_norm": 7.527818403529953, "learning_rate": 5.947376457813873e-06, "loss": 17.7602, "step": 24955 }, { "epoch": 0.4561756265194582, "grad_norm": 8.235916091449743, "learning_rate": 5.947085806720017e-06, "loss": 17.6098, "step": 24956 }, { "epoch": 0.45619390571590474, "grad_norm": 7.187161374228202, "learning_rate": 5.946795152306708e-06, "loss": 17.783, "step": 24957 }, { "epoch": 0.45621218491235127, "grad_norm": 6.41587946281288, "learning_rate": 5.946504494574963e-06, "loss": 17.2936, "step": 24958 }, { "epoch": 0.45623046410879775, "grad_norm": 5.618748298564456, "learning_rate": 5.946213833525805e-06, "loss": 17.2479, "step": 24959 }, { "epoch": 0.4562487433052443, "grad_norm": 5.766928935455178, "learning_rate": 5.945923169160245e-06, "loss": 17.3264, "step": 24960 }, { "epoch": 0.4562670225016908, "grad_norm": 7.84812429726118, "learning_rate": 5.945632501479309e-06, "loss": 17.9254, "step": 24961 }, { "epoch": 0.45628530169813736, "grad_norm": 7.181081642174104, "learning_rate": 5.945341830484012e-06, "loss": 17.5853, "step": 24962 }, { "epoch": 0.4563035808945839, "grad_norm": 6.726035651839546, "learning_rate": 5.945051156175377e-06, "loss": 17.7259, "step": 24963 }, { "epoch": 0.4563218600910304, "grad_norm": 5.917198199955994, "learning_rate": 5.944760478554416e-06, "loss": 17.3732, "step": 24964 }, { "epoch": 0.4563401392874769, "grad_norm": 6.127464928487141, "learning_rate": 5.9444697976221525e-06, "loss": 17.0653, "step": 24965 }, { "epoch": 0.45635841848392344, "grad_norm": 8.204374634512021, "learning_rate": 5.944179113379606e-06, "loss": 18.1827, "step": 24966 }, { "epoch": 0.45637669768037, "grad_norm": 6.621224850233595, "learning_rate": 5.943888425827793e-06, "loss": 17.4072, "step": 24967 }, { "epoch": 0.4563949768768165, "grad_norm": 6.47726189934075, "learning_rate": 5.943597734967732e-06, "loss": 17.6277, "step": 24968 }, { "epoch": 0.456413256073263, "grad_norm": 6.388248961482317, "learning_rate": 5.943307040800443e-06, "loss": 17.3201, "step": 24969 }, { "epoch": 0.45643153526970953, "grad_norm": 6.423716528385374, "learning_rate": 5.943016343326945e-06, "loss": 17.679, "step": 24970 }, { "epoch": 0.45644981446615607, "grad_norm": 5.6762539725291195, "learning_rate": 5.942725642548256e-06, "loss": 17.3484, "step": 24971 }, { "epoch": 0.4564680936626026, "grad_norm": 6.9716001442144675, "learning_rate": 5.942434938465396e-06, "loss": 17.838, "step": 24972 }, { "epoch": 0.45648637285904914, "grad_norm": 5.593409809155962, "learning_rate": 5.942144231079383e-06, "loss": 17.1198, "step": 24973 }, { "epoch": 0.4565046520554956, "grad_norm": 7.224772400014191, "learning_rate": 5.941853520391237e-06, "loss": 17.8676, "step": 24974 }, { "epoch": 0.45652293125194215, "grad_norm": 7.4132826230866735, "learning_rate": 5.941562806401975e-06, "loss": 18.2126, "step": 24975 }, { "epoch": 0.4565412104483887, "grad_norm": 5.8980284137638455, "learning_rate": 5.941272089112617e-06, "loss": 17.1877, "step": 24976 }, { "epoch": 0.4565594896448352, "grad_norm": 6.153005448015386, "learning_rate": 5.940981368524184e-06, "loss": 17.4887, "step": 24977 }, { "epoch": 0.45657776884128176, "grad_norm": 5.92254038898274, "learning_rate": 5.940690644637691e-06, "loss": 17.3755, "step": 24978 }, { "epoch": 0.45659604803772824, "grad_norm": 6.557337023734522, "learning_rate": 5.940399917454159e-06, "loss": 17.6272, "step": 24979 }, { "epoch": 0.4566143272341748, "grad_norm": 5.132409964638729, "learning_rate": 5.940109186974609e-06, "loss": 17.1082, "step": 24980 }, { "epoch": 0.4566326064306213, "grad_norm": 4.745751923636569, "learning_rate": 5.939818453200056e-06, "loss": 16.8106, "step": 24981 }, { "epoch": 0.45665088562706785, "grad_norm": 6.50431922714319, "learning_rate": 5.939527716131521e-06, "loss": 17.2606, "step": 24982 }, { "epoch": 0.4566691648235144, "grad_norm": 7.071391533159738, "learning_rate": 5.939236975770022e-06, "loss": 17.753, "step": 24983 }, { "epoch": 0.45668744401996086, "grad_norm": 6.161093548966815, "learning_rate": 5.938946232116581e-06, "loss": 17.1366, "step": 24984 }, { "epoch": 0.4567057232164074, "grad_norm": 5.539176743544879, "learning_rate": 5.9386554851722134e-06, "loss": 17.2145, "step": 24985 }, { "epoch": 0.45672400241285394, "grad_norm": 5.763589022567573, "learning_rate": 5.938364734937941e-06, "loss": 17.2451, "step": 24986 }, { "epoch": 0.45674228160930047, "grad_norm": 6.188618538411882, "learning_rate": 5.9380739814147805e-06, "loss": 17.569, "step": 24987 }, { "epoch": 0.456760560805747, "grad_norm": 7.275887982283109, "learning_rate": 5.937783224603753e-06, "loss": 18.0389, "step": 24988 }, { "epoch": 0.4567788400021935, "grad_norm": 5.9095840773301, "learning_rate": 5.937492464505875e-06, "loss": 17.1005, "step": 24989 }, { "epoch": 0.45679711919864, "grad_norm": 6.170282123101501, "learning_rate": 5.937201701122171e-06, "loss": 17.5119, "step": 24990 }, { "epoch": 0.45681539839508656, "grad_norm": 6.781463448853069, "learning_rate": 5.936910934453652e-06, "loss": 17.6081, "step": 24991 }, { "epoch": 0.4568336775915331, "grad_norm": 6.42650410498322, "learning_rate": 5.9366201645013435e-06, "loss": 17.6133, "step": 24992 }, { "epoch": 0.4568519567879796, "grad_norm": 6.944239694729154, "learning_rate": 5.936329391266261e-06, "loss": 17.68, "step": 24993 }, { "epoch": 0.4568702359844261, "grad_norm": 9.275047584937695, "learning_rate": 5.936038614749429e-06, "loss": 18.3214, "step": 24994 }, { "epoch": 0.45688851518087265, "grad_norm": 6.422879899323243, "learning_rate": 5.9357478349518595e-06, "loss": 17.5967, "step": 24995 }, { "epoch": 0.4569067943773192, "grad_norm": 7.40083545125522, "learning_rate": 5.935457051874575e-06, "loss": 18.1773, "step": 24996 }, { "epoch": 0.4569250735737657, "grad_norm": 7.582751640422174, "learning_rate": 5.935166265518597e-06, "loss": 17.8034, "step": 24997 }, { "epoch": 0.4569433527702122, "grad_norm": 5.281919500133999, "learning_rate": 5.934875475884942e-06, "loss": 17.1753, "step": 24998 }, { "epoch": 0.45696163196665873, "grad_norm": 5.054218994977413, "learning_rate": 5.9345846829746275e-06, "loss": 16.9593, "step": 24999 }, { "epoch": 0.45697991116310527, "grad_norm": 5.185175175409264, "learning_rate": 5.934293886788676e-06, "loss": 16.984, "step": 25000 }, { "epoch": 0.4569981903595518, "grad_norm": 6.5781400783196835, "learning_rate": 5.934003087328105e-06, "loss": 17.9447, "step": 25001 }, { "epoch": 0.45701646955599834, "grad_norm": 5.665450611948805, "learning_rate": 5.933712284593936e-06, "loss": 17.1155, "step": 25002 }, { "epoch": 0.4570347487524448, "grad_norm": 6.668786084884377, "learning_rate": 5.933421478587184e-06, "loss": 17.3732, "step": 25003 }, { "epoch": 0.45705302794889135, "grad_norm": 5.047739528366748, "learning_rate": 5.933130669308873e-06, "loss": 16.9366, "step": 25004 }, { "epoch": 0.4570713071453379, "grad_norm": 5.7553338101981835, "learning_rate": 5.932839856760018e-06, "loss": 17.3289, "step": 25005 }, { "epoch": 0.4570895863417844, "grad_norm": 5.205437676452599, "learning_rate": 5.932549040941641e-06, "loss": 16.904, "step": 25006 }, { "epoch": 0.45710786553823096, "grad_norm": 6.927558186739496, "learning_rate": 5.932258221854761e-06, "loss": 17.8677, "step": 25007 }, { "epoch": 0.45712614473467744, "grad_norm": 6.593786348312188, "learning_rate": 5.931967399500397e-06, "loss": 17.5772, "step": 25008 }, { "epoch": 0.457144423931124, "grad_norm": 8.053694123470054, "learning_rate": 5.931676573879568e-06, "loss": 17.9786, "step": 25009 }, { "epoch": 0.4571627031275705, "grad_norm": 5.583726033411606, "learning_rate": 5.931385744993292e-06, "loss": 17.0944, "step": 25010 }, { "epoch": 0.45718098232401705, "grad_norm": 5.1863500794669015, "learning_rate": 5.931094912842592e-06, "loss": 16.9153, "step": 25011 }, { "epoch": 0.4571992615204636, "grad_norm": 6.834577803973572, "learning_rate": 5.930804077428484e-06, "loss": 17.7712, "step": 25012 }, { "epoch": 0.45721754071691006, "grad_norm": 5.897365251803732, "learning_rate": 5.930513238751988e-06, "loss": 17.4395, "step": 25013 }, { "epoch": 0.4572358199133566, "grad_norm": 5.8493365305636535, "learning_rate": 5.930222396814125e-06, "loss": 17.4145, "step": 25014 }, { "epoch": 0.45725409910980314, "grad_norm": 5.287957136092569, "learning_rate": 5.929931551615912e-06, "loss": 17.0397, "step": 25015 }, { "epoch": 0.45727237830624967, "grad_norm": 7.513279019333759, "learning_rate": 5.9296407031583705e-06, "loss": 18.2377, "step": 25016 }, { "epoch": 0.4572906575026962, "grad_norm": 7.1404263870812175, "learning_rate": 5.929349851442519e-06, "loss": 17.5738, "step": 25017 }, { "epoch": 0.4573089366991427, "grad_norm": 5.902897723117464, "learning_rate": 5.929058996469377e-06, "loss": 17.2764, "step": 25018 }, { "epoch": 0.4573272158955892, "grad_norm": 6.950774681367328, "learning_rate": 5.928768138239962e-06, "loss": 17.6763, "step": 25019 }, { "epoch": 0.45734549509203576, "grad_norm": 5.235636441851553, "learning_rate": 5.928477276755297e-06, "loss": 16.9038, "step": 25020 }, { "epoch": 0.4573637742884823, "grad_norm": 5.592681693440445, "learning_rate": 5.9281864120164e-06, "loss": 17.149, "step": 25021 }, { "epoch": 0.45738205348492883, "grad_norm": 6.098654773706037, "learning_rate": 5.927895544024289e-06, "loss": 17.6138, "step": 25022 }, { "epoch": 0.4574003326813753, "grad_norm": 5.711515865679313, "learning_rate": 5.927604672779985e-06, "loss": 17.1243, "step": 25023 }, { "epoch": 0.45741861187782185, "grad_norm": 7.063529120006668, "learning_rate": 5.927313798284507e-06, "loss": 17.8486, "step": 25024 }, { "epoch": 0.4574368910742684, "grad_norm": 7.12987208995607, "learning_rate": 5.927022920538876e-06, "loss": 18.1592, "step": 25025 }, { "epoch": 0.4574551702707149, "grad_norm": 6.505539588273021, "learning_rate": 5.926732039544109e-06, "loss": 17.3972, "step": 25026 }, { "epoch": 0.4574734494671614, "grad_norm": 6.7104988310585645, "learning_rate": 5.926441155301226e-06, "loss": 17.385, "step": 25027 }, { "epoch": 0.45749172866360793, "grad_norm": 6.991219655943665, "learning_rate": 5.926150267811248e-06, "loss": 17.8121, "step": 25028 }, { "epoch": 0.45751000786005447, "grad_norm": 5.934969451134591, "learning_rate": 5.9258593770751935e-06, "loss": 17.486, "step": 25029 }, { "epoch": 0.457528287056501, "grad_norm": 9.175652058688627, "learning_rate": 5.925568483094081e-06, "loss": 17.9355, "step": 25030 }, { "epoch": 0.45754656625294754, "grad_norm": 6.92586047309848, "learning_rate": 5.925277585868934e-06, "loss": 17.6992, "step": 25031 }, { "epoch": 0.457564845449394, "grad_norm": 7.093206602569276, "learning_rate": 5.9249866854007685e-06, "loss": 17.9072, "step": 25032 }, { "epoch": 0.45758312464584056, "grad_norm": 5.559411962234882, "learning_rate": 5.924695781690604e-06, "loss": 17.046, "step": 25033 }, { "epoch": 0.4576014038422871, "grad_norm": 7.940295854282515, "learning_rate": 5.92440487473946e-06, "loss": 17.6611, "step": 25034 }, { "epoch": 0.4576196830387336, "grad_norm": 9.122628018645774, "learning_rate": 5.924113964548361e-06, "loss": 17.8055, "step": 25035 }, { "epoch": 0.45763796223518016, "grad_norm": 6.720342112232046, "learning_rate": 5.923823051118319e-06, "loss": 17.6522, "step": 25036 }, { "epoch": 0.45765624143162664, "grad_norm": 6.209130475710021, "learning_rate": 5.923532134450358e-06, "loss": 17.2298, "step": 25037 }, { "epoch": 0.4576745206280732, "grad_norm": 4.943226370519743, "learning_rate": 5.923241214545496e-06, "loss": 16.8489, "step": 25038 }, { "epoch": 0.4576927998245197, "grad_norm": 6.909209484041424, "learning_rate": 5.9229502914047565e-06, "loss": 17.4053, "step": 25039 }, { "epoch": 0.45771107902096625, "grad_norm": 5.227227420071569, "learning_rate": 5.922659365029156e-06, "loss": 17.0036, "step": 25040 }, { "epoch": 0.4577293582174128, "grad_norm": 6.522922057325364, "learning_rate": 5.922368435419713e-06, "loss": 17.774, "step": 25041 }, { "epoch": 0.45774763741385927, "grad_norm": 6.219062308287128, "learning_rate": 5.922077502577449e-06, "loss": 17.3931, "step": 25042 }, { "epoch": 0.4577659166103058, "grad_norm": 6.206941750695732, "learning_rate": 5.921786566503384e-06, "loss": 17.4327, "step": 25043 }, { "epoch": 0.45778419580675234, "grad_norm": 8.247628077256223, "learning_rate": 5.921495627198537e-06, "loss": 18.1207, "step": 25044 }, { "epoch": 0.45780247500319887, "grad_norm": 5.742348481906552, "learning_rate": 5.921204684663927e-06, "loss": 17.5051, "step": 25045 }, { "epoch": 0.4578207541996454, "grad_norm": 7.0053822967011214, "learning_rate": 5.920913738900575e-06, "loss": 17.9018, "step": 25046 }, { "epoch": 0.4578390333960919, "grad_norm": 6.3120630670410005, "learning_rate": 5.920622789909499e-06, "loss": 17.2657, "step": 25047 }, { "epoch": 0.4578573125925384, "grad_norm": 6.638560846178379, "learning_rate": 5.920331837691722e-06, "loss": 17.3365, "step": 25048 }, { "epoch": 0.45787559178898496, "grad_norm": 6.913310899836669, "learning_rate": 5.920040882248261e-06, "loss": 17.3952, "step": 25049 }, { "epoch": 0.4578938709854315, "grad_norm": 5.8329902562283165, "learning_rate": 5.919749923580137e-06, "loss": 17.53, "step": 25050 }, { "epoch": 0.45791215018187803, "grad_norm": 7.880407501848141, "learning_rate": 5.919458961688368e-06, "loss": 17.8751, "step": 25051 }, { "epoch": 0.4579304293783245, "grad_norm": 7.8721008769038985, "learning_rate": 5.919167996573975e-06, "loss": 17.7918, "step": 25052 }, { "epoch": 0.45794870857477105, "grad_norm": 6.212387184352187, "learning_rate": 5.918877028237982e-06, "loss": 17.2153, "step": 25053 }, { "epoch": 0.4579669877712176, "grad_norm": 7.679853254937747, "learning_rate": 5.9185860566814005e-06, "loss": 18.0365, "step": 25054 }, { "epoch": 0.4579852669676641, "grad_norm": 5.9041637537375316, "learning_rate": 5.9182950819052554e-06, "loss": 16.9869, "step": 25055 }, { "epoch": 0.45800354616411065, "grad_norm": 7.22318367184783, "learning_rate": 5.9180041039105664e-06, "loss": 18.0856, "step": 25056 }, { "epoch": 0.45802182536055713, "grad_norm": 8.581666754043548, "learning_rate": 5.917713122698352e-06, "loss": 18.331, "step": 25057 }, { "epoch": 0.45804010455700367, "grad_norm": 5.827562596542903, "learning_rate": 5.9174221382696325e-06, "loss": 17.3273, "step": 25058 }, { "epoch": 0.4580583837534502, "grad_norm": 5.2291750829573775, "learning_rate": 5.917131150625431e-06, "loss": 17.1274, "step": 25059 }, { "epoch": 0.45807666294989674, "grad_norm": 7.002193119910955, "learning_rate": 5.91684015976676e-06, "loss": 17.8088, "step": 25060 }, { "epoch": 0.4580949421463432, "grad_norm": 5.064940999586588, "learning_rate": 5.916549165694646e-06, "loss": 16.8917, "step": 25061 }, { "epoch": 0.45811322134278976, "grad_norm": 6.07487078474585, "learning_rate": 5.9162581684101065e-06, "loss": 17.2801, "step": 25062 }, { "epoch": 0.4581315005392363, "grad_norm": 6.100885615395288, "learning_rate": 5.915967167914163e-06, "loss": 17.3161, "step": 25063 }, { "epoch": 0.4581497797356828, "grad_norm": 6.365595687647508, "learning_rate": 5.915676164207833e-06, "loss": 17.3906, "step": 25064 }, { "epoch": 0.45816805893212936, "grad_norm": 8.370558908105483, "learning_rate": 5.915385157292135e-06, "loss": 17.9775, "step": 25065 }, { "epoch": 0.45818633812857584, "grad_norm": 4.743868565626321, "learning_rate": 5.915094147168096e-06, "loss": 16.7813, "step": 25066 }, { "epoch": 0.4582046173250224, "grad_norm": 6.470216535288608, "learning_rate": 5.914803133836729e-06, "loss": 17.4377, "step": 25067 }, { "epoch": 0.4582228965214689, "grad_norm": 5.208946094185995, "learning_rate": 5.914512117299056e-06, "loss": 16.9007, "step": 25068 }, { "epoch": 0.45824117571791545, "grad_norm": 6.511039458565328, "learning_rate": 5.914221097556097e-06, "loss": 17.6099, "step": 25069 }, { "epoch": 0.458259454914362, "grad_norm": 6.199890150444937, "learning_rate": 5.913930074608873e-06, "loss": 17.3178, "step": 25070 }, { "epoch": 0.45827773411080847, "grad_norm": 6.396809693260093, "learning_rate": 5.913639048458404e-06, "loss": 17.4528, "step": 25071 }, { "epoch": 0.458296013307255, "grad_norm": 5.383985903555381, "learning_rate": 5.913348019105709e-06, "loss": 17.1753, "step": 25072 }, { "epoch": 0.45831429250370154, "grad_norm": 8.01317385967738, "learning_rate": 5.913056986551809e-06, "loss": 18.2772, "step": 25073 }, { "epoch": 0.4583325717001481, "grad_norm": 5.055525897880802, "learning_rate": 5.912765950797723e-06, "loss": 17.0403, "step": 25074 }, { "epoch": 0.4583508508965946, "grad_norm": 5.834932092271731, "learning_rate": 5.912474911844471e-06, "loss": 17.349, "step": 25075 }, { "epoch": 0.4583691300930411, "grad_norm": 7.3257768903772265, "learning_rate": 5.912183869693074e-06, "loss": 17.8751, "step": 25076 }, { "epoch": 0.4583874092894876, "grad_norm": 7.538587155687487, "learning_rate": 5.911892824344554e-06, "loss": 18.3766, "step": 25077 }, { "epoch": 0.45840568848593416, "grad_norm": 7.160764942228313, "learning_rate": 5.911601775799925e-06, "loss": 17.8963, "step": 25078 }, { "epoch": 0.4584239676823807, "grad_norm": 7.378645955279105, "learning_rate": 5.911310724060213e-06, "loss": 17.7232, "step": 25079 }, { "epoch": 0.45844224687882723, "grad_norm": 5.049835034722711, "learning_rate": 5.9110196691264365e-06, "loss": 17.0266, "step": 25080 }, { "epoch": 0.4584605260752737, "grad_norm": 6.34414887518242, "learning_rate": 5.9107286109996135e-06, "loss": 17.4845, "step": 25081 }, { "epoch": 0.45847880527172025, "grad_norm": 5.977932616193766, "learning_rate": 5.910437549680766e-06, "loss": 17.3735, "step": 25082 }, { "epoch": 0.4584970844681668, "grad_norm": 6.308358394565755, "learning_rate": 5.910146485170914e-06, "loss": 17.6364, "step": 25083 }, { "epoch": 0.4585153636646133, "grad_norm": 5.86124418007269, "learning_rate": 5.9098554174710785e-06, "loss": 17.3927, "step": 25084 }, { "epoch": 0.45853364286105985, "grad_norm": 6.138031504468722, "learning_rate": 5.909564346582279e-06, "loss": 17.458, "step": 25085 }, { "epoch": 0.45855192205750633, "grad_norm": 5.4553383569489755, "learning_rate": 5.909273272505534e-06, "loss": 17.2117, "step": 25086 }, { "epoch": 0.45857020125395287, "grad_norm": 7.030659033261773, "learning_rate": 5.908982195241865e-06, "loss": 17.5818, "step": 25087 }, { "epoch": 0.4585884804503994, "grad_norm": 5.483053980848989, "learning_rate": 5.908691114792293e-06, "loss": 17.0663, "step": 25088 }, { "epoch": 0.45860675964684594, "grad_norm": 6.1136181010802275, "learning_rate": 5.908400031157837e-06, "loss": 17.1744, "step": 25089 }, { "epoch": 0.4586250388432925, "grad_norm": 5.69582017469816, "learning_rate": 5.908108944339519e-06, "loss": 17.0884, "step": 25090 }, { "epoch": 0.45864331803973896, "grad_norm": 5.736528975665548, "learning_rate": 5.907817854338357e-06, "loss": 17.1045, "step": 25091 }, { "epoch": 0.4586615972361855, "grad_norm": 7.6264465897051, "learning_rate": 5.907526761155371e-06, "loss": 17.871, "step": 25092 }, { "epoch": 0.458679876432632, "grad_norm": 6.128675020624806, "learning_rate": 5.907235664791583e-06, "loss": 17.5724, "step": 25093 }, { "epoch": 0.45869815562907856, "grad_norm": 5.955904531388647, "learning_rate": 5.906944565248015e-06, "loss": 17.3476, "step": 25094 }, { "epoch": 0.45871643482552504, "grad_norm": 7.08895749649592, "learning_rate": 5.9066534625256836e-06, "loss": 17.7101, "step": 25095 }, { "epoch": 0.4587347140219716, "grad_norm": 4.708930776386075, "learning_rate": 5.90636235662561e-06, "loss": 16.7383, "step": 25096 }, { "epoch": 0.4587529932184181, "grad_norm": 7.568027262060807, "learning_rate": 5.906071247548814e-06, "loss": 18.0702, "step": 25097 }, { "epoch": 0.45877127241486465, "grad_norm": 6.160265851589954, "learning_rate": 5.90578013529632e-06, "loss": 17.3222, "step": 25098 }, { "epoch": 0.4587895516113112, "grad_norm": 5.221119349192646, "learning_rate": 5.905489019869142e-06, "loss": 16.8484, "step": 25099 }, { "epoch": 0.45880783080775767, "grad_norm": 5.777141960437828, "learning_rate": 5.905197901268305e-06, "loss": 17.3222, "step": 25100 }, { "epoch": 0.4588261100042042, "grad_norm": 6.447375505580861, "learning_rate": 5.9049067794948275e-06, "loss": 17.2996, "step": 25101 }, { "epoch": 0.45884438920065074, "grad_norm": 6.997710347061462, "learning_rate": 5.904615654549732e-06, "loss": 17.3454, "step": 25102 }, { "epoch": 0.4588626683970973, "grad_norm": 7.18329244941718, "learning_rate": 5.904324526434035e-06, "loss": 17.8857, "step": 25103 }, { "epoch": 0.4588809475935438, "grad_norm": 6.85829402999007, "learning_rate": 5.904033395148761e-06, "loss": 17.6004, "step": 25104 }, { "epoch": 0.4588992267899903, "grad_norm": 6.094213590941932, "learning_rate": 5.903742260694926e-06, "loss": 17.2283, "step": 25105 }, { "epoch": 0.4589175059864368, "grad_norm": 7.3716304976459135, "learning_rate": 5.903451123073554e-06, "loss": 17.8677, "step": 25106 }, { "epoch": 0.45893578518288336, "grad_norm": 5.238332336507377, "learning_rate": 5.903159982285663e-06, "loss": 17.0321, "step": 25107 }, { "epoch": 0.4589540643793299, "grad_norm": 6.935188174699001, "learning_rate": 5.902868838332277e-06, "loss": 17.7228, "step": 25108 }, { "epoch": 0.45897234357577643, "grad_norm": 6.557339988151756, "learning_rate": 5.9025776912144125e-06, "loss": 17.7469, "step": 25109 }, { "epoch": 0.4589906227722229, "grad_norm": 7.055907107137074, "learning_rate": 5.902286540933091e-06, "loss": 17.8525, "step": 25110 }, { "epoch": 0.45900890196866945, "grad_norm": 5.214122725260912, "learning_rate": 5.901995387489335e-06, "loss": 17.1292, "step": 25111 }, { "epoch": 0.459027181165116, "grad_norm": 6.4066059822303485, "learning_rate": 5.9017042308841635e-06, "loss": 17.475, "step": 25112 }, { "epoch": 0.4590454603615625, "grad_norm": 6.777180689280732, "learning_rate": 5.901413071118596e-06, "loss": 17.4952, "step": 25113 }, { "epoch": 0.45906373955800905, "grad_norm": 6.146671576227362, "learning_rate": 5.901121908193654e-06, "loss": 17.554, "step": 25114 }, { "epoch": 0.45908201875445553, "grad_norm": 6.734242849027381, "learning_rate": 5.900830742110358e-06, "loss": 17.8993, "step": 25115 }, { "epoch": 0.45910029795090207, "grad_norm": 5.438781339591436, "learning_rate": 5.900539572869728e-06, "loss": 17.2216, "step": 25116 }, { "epoch": 0.4591185771473486, "grad_norm": 5.51344483745984, "learning_rate": 5.900248400472786e-06, "loss": 17.3257, "step": 25117 }, { "epoch": 0.45913685634379514, "grad_norm": 6.841229462459413, "learning_rate": 5.899957224920551e-06, "loss": 17.4026, "step": 25118 }, { "epoch": 0.4591551355402417, "grad_norm": 7.015296903704281, "learning_rate": 5.899666046214043e-06, "loss": 17.751, "step": 25119 }, { "epoch": 0.45917341473668816, "grad_norm": 6.198221657390064, "learning_rate": 5.899374864354284e-06, "loss": 17.2304, "step": 25120 }, { "epoch": 0.4591916939331347, "grad_norm": 5.665737824164518, "learning_rate": 5.899083679342296e-06, "loss": 17.0227, "step": 25121 }, { "epoch": 0.45920997312958123, "grad_norm": 7.453643756704126, "learning_rate": 5.898792491179096e-06, "loss": 17.9597, "step": 25122 }, { "epoch": 0.45922825232602776, "grad_norm": 8.077937538556672, "learning_rate": 5.898501299865707e-06, "loss": 18.109, "step": 25123 }, { "epoch": 0.4592465315224743, "grad_norm": 8.610773605697204, "learning_rate": 5.898210105403147e-06, "loss": 18.0171, "step": 25124 }, { "epoch": 0.4592648107189208, "grad_norm": 6.443037608981733, "learning_rate": 5.897918907792442e-06, "loss": 17.3407, "step": 25125 }, { "epoch": 0.4592830899153673, "grad_norm": 6.200238922158968, "learning_rate": 5.897627707034606e-06, "loss": 17.6508, "step": 25126 }, { "epoch": 0.45930136911181385, "grad_norm": 9.187035784712638, "learning_rate": 5.897336503130664e-06, "loss": 18.0105, "step": 25127 }, { "epoch": 0.4593196483082604, "grad_norm": 7.111063903992415, "learning_rate": 5.897045296081636e-06, "loss": 17.4023, "step": 25128 }, { "epoch": 0.45933792750470687, "grad_norm": 6.1989517543475054, "learning_rate": 5.896754085888541e-06, "loss": 17.3739, "step": 25129 }, { "epoch": 0.4593562067011534, "grad_norm": 5.6610742089571335, "learning_rate": 5.896462872552401e-06, "loss": 17.3181, "step": 25130 }, { "epoch": 0.45937448589759994, "grad_norm": 6.512710182304373, "learning_rate": 5.896171656074237e-06, "loss": 17.3586, "step": 25131 }, { "epoch": 0.4593927650940465, "grad_norm": 6.210866625513039, "learning_rate": 5.895880436455068e-06, "loss": 17.3805, "step": 25132 }, { "epoch": 0.459411044290493, "grad_norm": 6.937713023128045, "learning_rate": 5.895589213695917e-06, "loss": 17.7345, "step": 25133 }, { "epoch": 0.4594293234869395, "grad_norm": 9.909806208021777, "learning_rate": 5.895297987797803e-06, "loss": 17.8982, "step": 25134 }, { "epoch": 0.459447602683386, "grad_norm": 5.917635441195128, "learning_rate": 5.895006758761749e-06, "loss": 17.294, "step": 25135 }, { "epoch": 0.45946588187983256, "grad_norm": 6.053944887018149, "learning_rate": 5.894715526588771e-06, "loss": 17.3829, "step": 25136 }, { "epoch": 0.4594841610762791, "grad_norm": 6.8268018748677575, "learning_rate": 5.8944242912798935e-06, "loss": 17.483, "step": 25137 }, { "epoch": 0.45950244027272563, "grad_norm": 5.369844108200249, "learning_rate": 5.894133052836138e-06, "loss": 17.2398, "step": 25138 }, { "epoch": 0.4595207194691721, "grad_norm": 13.63973965013231, "learning_rate": 5.8938418112585225e-06, "loss": 18.634, "step": 25139 }, { "epoch": 0.45953899866561865, "grad_norm": 5.121883898150083, "learning_rate": 5.8935505665480695e-06, "loss": 17.051, "step": 25140 }, { "epoch": 0.4595572778620652, "grad_norm": 6.89128015628205, "learning_rate": 5.893259318705799e-06, "loss": 17.4457, "step": 25141 }, { "epoch": 0.4595755570585117, "grad_norm": 6.989561441812156, "learning_rate": 5.892968067732731e-06, "loss": 17.7903, "step": 25142 }, { "epoch": 0.45959383625495825, "grad_norm": 5.770252141612089, "learning_rate": 5.892676813629889e-06, "loss": 17.1302, "step": 25143 }, { "epoch": 0.45961211545140473, "grad_norm": 6.059587407676246, "learning_rate": 5.892385556398292e-06, "loss": 17.501, "step": 25144 }, { "epoch": 0.45963039464785127, "grad_norm": 6.491129225958094, "learning_rate": 5.892094296038961e-06, "loss": 17.6745, "step": 25145 }, { "epoch": 0.4596486738442978, "grad_norm": 7.600274561858066, "learning_rate": 5.891803032552916e-06, "loss": 17.8503, "step": 25146 }, { "epoch": 0.45966695304074434, "grad_norm": 7.128989380532426, "learning_rate": 5.89151176594118e-06, "loss": 17.8868, "step": 25147 }, { "epoch": 0.4596852322371909, "grad_norm": 6.210565817815751, "learning_rate": 5.891220496204772e-06, "loss": 17.5276, "step": 25148 }, { "epoch": 0.45970351143363736, "grad_norm": 5.885705935678427, "learning_rate": 5.890929223344715e-06, "loss": 17.3678, "step": 25149 }, { "epoch": 0.4597217906300839, "grad_norm": 6.667881055100639, "learning_rate": 5.890637947362027e-06, "loss": 17.8547, "step": 25150 }, { "epoch": 0.45974006982653043, "grad_norm": 6.99481690001039, "learning_rate": 5.890346668257729e-06, "loss": 17.7187, "step": 25151 }, { "epoch": 0.45975834902297696, "grad_norm": 7.695289062844708, "learning_rate": 5.890055386032845e-06, "loss": 18.2169, "step": 25152 }, { "epoch": 0.4597766282194235, "grad_norm": 6.9050078947645925, "learning_rate": 5.889764100688394e-06, "loss": 17.721, "step": 25153 }, { "epoch": 0.45979490741587, "grad_norm": 6.58402216988812, "learning_rate": 5.8894728122253965e-06, "loss": 17.4457, "step": 25154 }, { "epoch": 0.4598131866123165, "grad_norm": 6.988560929739352, "learning_rate": 5.889181520644874e-06, "loss": 17.2455, "step": 25155 }, { "epoch": 0.45983146580876305, "grad_norm": 6.206599477481273, "learning_rate": 5.888890225947848e-06, "loss": 17.4213, "step": 25156 }, { "epoch": 0.4598497450052096, "grad_norm": 5.6008437350115985, "learning_rate": 5.888598928135338e-06, "loss": 17.0786, "step": 25157 }, { "epoch": 0.4598680242016561, "grad_norm": 7.791893690638781, "learning_rate": 5.888307627208366e-06, "loss": 18.0871, "step": 25158 }, { "epoch": 0.4598863033981026, "grad_norm": 5.786612455520085, "learning_rate": 5.888016323167954e-06, "loss": 17.1307, "step": 25159 }, { "epoch": 0.45990458259454914, "grad_norm": 6.026676468863097, "learning_rate": 5.88772501601512e-06, "loss": 17.2374, "step": 25160 }, { "epoch": 0.4599228617909957, "grad_norm": 9.161378002187389, "learning_rate": 5.887433705750889e-06, "loss": 18.0735, "step": 25161 }, { "epoch": 0.4599411409874422, "grad_norm": 7.409644025610671, "learning_rate": 5.887142392376279e-06, "loss": 17.5688, "step": 25162 }, { "epoch": 0.4599594201838887, "grad_norm": 5.614609338578762, "learning_rate": 5.886851075892311e-06, "loss": 17.2005, "step": 25163 }, { "epoch": 0.4599776993803352, "grad_norm": 6.605912678841178, "learning_rate": 5.886559756300008e-06, "loss": 17.5157, "step": 25164 }, { "epoch": 0.45999597857678176, "grad_norm": 4.981666970189918, "learning_rate": 5.886268433600388e-06, "loss": 16.7706, "step": 25165 }, { "epoch": 0.4600142577732283, "grad_norm": 6.723853799327433, "learning_rate": 5.885977107794477e-06, "loss": 17.6284, "step": 25166 }, { "epoch": 0.46003253696967483, "grad_norm": 4.767315444881347, "learning_rate": 5.885685778883292e-06, "loss": 16.9863, "step": 25167 }, { "epoch": 0.4600508161661213, "grad_norm": 5.883460762423431, "learning_rate": 5.885394446867855e-06, "loss": 17.5723, "step": 25168 }, { "epoch": 0.46006909536256785, "grad_norm": 6.7623460547856915, "learning_rate": 5.885103111749186e-06, "loss": 17.5193, "step": 25169 }, { "epoch": 0.4600873745590144, "grad_norm": 5.578138536650387, "learning_rate": 5.884811773528309e-06, "loss": 17.1289, "step": 25170 }, { "epoch": 0.4601056537554609, "grad_norm": 7.633891638786964, "learning_rate": 5.884520432206243e-06, "loss": 17.5063, "step": 25171 }, { "epoch": 0.46012393295190746, "grad_norm": 7.294758059690856, "learning_rate": 5.88422908778401e-06, "loss": 17.6058, "step": 25172 }, { "epoch": 0.46014221214835394, "grad_norm": 6.53870452695131, "learning_rate": 5.883937740262631e-06, "loss": 17.7105, "step": 25173 }, { "epoch": 0.46016049134480047, "grad_norm": 7.234411483567285, "learning_rate": 5.883646389643126e-06, "loss": 17.8144, "step": 25174 }, { "epoch": 0.460178770541247, "grad_norm": 5.87520245327371, "learning_rate": 5.883355035926518e-06, "loss": 17.2566, "step": 25175 }, { "epoch": 0.46019704973769354, "grad_norm": 7.127310428675387, "learning_rate": 5.8830636791138265e-06, "loss": 17.3071, "step": 25176 }, { "epoch": 0.4602153289341401, "grad_norm": 7.0579754506303205, "learning_rate": 5.8827723192060745e-06, "loss": 17.7469, "step": 25177 }, { "epoch": 0.46023360813058656, "grad_norm": 7.4672521690141584, "learning_rate": 5.882480956204281e-06, "loss": 18.2015, "step": 25178 }, { "epoch": 0.4602518873270331, "grad_norm": 5.607334949849727, "learning_rate": 5.882189590109468e-06, "loss": 17.1994, "step": 25179 }, { "epoch": 0.46027016652347963, "grad_norm": 6.617843704706007, "learning_rate": 5.881898220922658e-06, "loss": 17.7119, "step": 25180 }, { "epoch": 0.46028844571992616, "grad_norm": 8.233597757977755, "learning_rate": 5.881606848644872e-06, "loss": 18.2433, "step": 25181 }, { "epoch": 0.4603067249163727, "grad_norm": 6.630516398476976, "learning_rate": 5.881315473277129e-06, "loss": 17.459, "step": 25182 }, { "epoch": 0.4603250041128192, "grad_norm": 6.928581051265842, "learning_rate": 5.881024094820451e-06, "loss": 17.697, "step": 25183 }, { "epoch": 0.4603432833092657, "grad_norm": 6.309244438724938, "learning_rate": 5.880732713275863e-06, "loss": 17.4451, "step": 25184 }, { "epoch": 0.46036156250571225, "grad_norm": 10.785039141572657, "learning_rate": 5.880441328644381e-06, "loss": 17.9353, "step": 25185 }, { "epoch": 0.4603798417021588, "grad_norm": 5.6893484008183135, "learning_rate": 5.880149940927029e-06, "loss": 17.4092, "step": 25186 }, { "epoch": 0.4603981208986053, "grad_norm": 12.42222377509774, "learning_rate": 5.879858550124827e-06, "loss": 17.9524, "step": 25187 }, { "epoch": 0.4604164000950518, "grad_norm": 6.33427230977597, "learning_rate": 5.879567156238799e-06, "loss": 17.2989, "step": 25188 }, { "epoch": 0.46043467929149834, "grad_norm": 7.729890168576504, "learning_rate": 5.879275759269963e-06, "loss": 18.2629, "step": 25189 }, { "epoch": 0.4604529584879449, "grad_norm": 7.295201757649203, "learning_rate": 5.878984359219343e-06, "loss": 17.7452, "step": 25190 }, { "epoch": 0.4604712376843914, "grad_norm": 6.160423920275361, "learning_rate": 5.878692956087959e-06, "loss": 17.3426, "step": 25191 }, { "epoch": 0.46048951688083795, "grad_norm": 8.42585663647094, "learning_rate": 5.87840154987683e-06, "loss": 18.0182, "step": 25192 }, { "epoch": 0.4605077960772844, "grad_norm": 7.534976925029885, "learning_rate": 5.878110140586981e-06, "loss": 18.3795, "step": 25193 }, { "epoch": 0.46052607527373096, "grad_norm": 7.314838726838559, "learning_rate": 5.877818728219434e-06, "loss": 17.5525, "step": 25194 }, { "epoch": 0.4605443544701775, "grad_norm": 5.671522377539959, "learning_rate": 5.877527312775207e-06, "loss": 17.1415, "step": 25195 }, { "epoch": 0.46056263366662403, "grad_norm": 6.277055034352918, "learning_rate": 5.877235894255323e-06, "loss": 17.2759, "step": 25196 }, { "epoch": 0.4605809128630705, "grad_norm": 5.267296296698131, "learning_rate": 5.876944472660803e-06, "loss": 17.0471, "step": 25197 }, { "epoch": 0.46059919205951705, "grad_norm": 7.532153553065105, "learning_rate": 5.87665304799267e-06, "loss": 18.3739, "step": 25198 }, { "epoch": 0.4606174712559636, "grad_norm": 6.211914389122407, "learning_rate": 5.8763616202519435e-06, "loss": 17.4008, "step": 25199 }, { "epoch": 0.4606357504524101, "grad_norm": 4.745888638877521, "learning_rate": 5.876070189439645e-06, "loss": 16.9409, "step": 25200 }, { "epoch": 0.46065402964885666, "grad_norm": 7.994165483051935, "learning_rate": 5.875778755556797e-06, "loss": 18.1199, "step": 25201 }, { "epoch": 0.46067230884530314, "grad_norm": 7.676489971327513, "learning_rate": 5.8754873186044205e-06, "loss": 17.8826, "step": 25202 }, { "epoch": 0.46069058804174967, "grad_norm": 6.803787940231963, "learning_rate": 5.875195878583536e-06, "loss": 17.6777, "step": 25203 }, { "epoch": 0.4607088672381962, "grad_norm": 6.612945588168071, "learning_rate": 5.874904435495168e-06, "loss": 17.4038, "step": 25204 }, { "epoch": 0.46072714643464274, "grad_norm": 5.49287347466228, "learning_rate": 5.874612989340334e-06, "loss": 16.9628, "step": 25205 }, { "epoch": 0.4607454256310893, "grad_norm": 6.219369992367084, "learning_rate": 5.874321540120057e-06, "loss": 17.4567, "step": 25206 }, { "epoch": 0.46076370482753576, "grad_norm": 5.8198794907322515, "learning_rate": 5.87403008783536e-06, "loss": 17.366, "step": 25207 }, { "epoch": 0.4607819840239823, "grad_norm": 7.891903307016829, "learning_rate": 5.873738632487265e-06, "loss": 18.1387, "step": 25208 }, { "epoch": 0.46080026322042883, "grad_norm": 5.462459582829687, "learning_rate": 5.873447174076789e-06, "loss": 16.8464, "step": 25209 }, { "epoch": 0.46081854241687537, "grad_norm": 5.379997424751426, "learning_rate": 5.873155712604956e-06, "loss": 16.9951, "step": 25210 }, { "epoch": 0.4608368216133219, "grad_norm": 5.334205765095013, "learning_rate": 5.8728642480727915e-06, "loss": 17.0456, "step": 25211 }, { "epoch": 0.4608551008097684, "grad_norm": 6.22783642163609, "learning_rate": 5.8725727804813115e-06, "loss": 17.4138, "step": 25212 }, { "epoch": 0.4608733800062149, "grad_norm": 4.8363785858796575, "learning_rate": 5.87228130983154e-06, "loss": 16.8598, "step": 25213 }, { "epoch": 0.46089165920266145, "grad_norm": 7.366645808860496, "learning_rate": 5.871989836124498e-06, "loss": 17.8513, "step": 25214 }, { "epoch": 0.460909938399108, "grad_norm": 6.301782432938622, "learning_rate": 5.871698359361207e-06, "loss": 17.4138, "step": 25215 }, { "epoch": 0.4609282175955545, "grad_norm": 6.958742772704289, "learning_rate": 5.871406879542688e-06, "loss": 17.8689, "step": 25216 }, { "epoch": 0.460946496792001, "grad_norm": 6.499677068371723, "learning_rate": 5.871115396669965e-06, "loss": 17.2869, "step": 25217 }, { "epoch": 0.46096477598844754, "grad_norm": 6.879665208187112, "learning_rate": 5.870823910744059e-06, "loss": 17.3627, "step": 25218 }, { "epoch": 0.4609830551848941, "grad_norm": 7.381367835499502, "learning_rate": 5.8705324217659886e-06, "loss": 17.6932, "step": 25219 }, { "epoch": 0.4610013343813406, "grad_norm": 5.87604450242848, "learning_rate": 5.870240929736778e-06, "loss": 17.2681, "step": 25220 }, { "epoch": 0.46101961357778715, "grad_norm": 7.899233988968155, "learning_rate": 5.869949434657449e-06, "loss": 17.809, "step": 25221 }, { "epoch": 0.4610378927742336, "grad_norm": 5.016263511686514, "learning_rate": 5.869657936529023e-06, "loss": 16.9404, "step": 25222 }, { "epoch": 0.46105617197068016, "grad_norm": 6.545002506397308, "learning_rate": 5.869366435352521e-06, "loss": 17.3206, "step": 25223 }, { "epoch": 0.4610744511671267, "grad_norm": 7.149939246215548, "learning_rate": 5.869074931128964e-06, "loss": 17.4842, "step": 25224 }, { "epoch": 0.46109273036357323, "grad_norm": 6.060316208116345, "learning_rate": 5.868783423859378e-06, "loss": 17.4576, "step": 25225 }, { "epoch": 0.46111100956001977, "grad_norm": 5.562265651496669, "learning_rate": 5.868491913544779e-06, "loss": 17.0999, "step": 25226 }, { "epoch": 0.46112928875646625, "grad_norm": 6.356959184287293, "learning_rate": 5.868200400186191e-06, "loss": 17.4441, "step": 25227 }, { "epoch": 0.4611475679529128, "grad_norm": 6.443925494825827, "learning_rate": 5.867908883784637e-06, "loss": 17.124, "step": 25228 }, { "epoch": 0.4611658471493593, "grad_norm": 6.061029860344893, "learning_rate": 5.867617364341137e-06, "loss": 17.1974, "step": 25229 }, { "epoch": 0.46118412634580586, "grad_norm": 5.775275949768164, "learning_rate": 5.8673258418567134e-06, "loss": 17.0047, "step": 25230 }, { "epoch": 0.46120240554225234, "grad_norm": 5.633981491550323, "learning_rate": 5.867034316332389e-06, "loss": 17.2366, "step": 25231 }, { "epoch": 0.46122068473869887, "grad_norm": 6.859929030125392, "learning_rate": 5.8667427877691825e-06, "loss": 17.5946, "step": 25232 }, { "epoch": 0.4612389639351454, "grad_norm": 7.569064890538536, "learning_rate": 5.86645125616812e-06, "loss": 17.7965, "step": 25233 }, { "epoch": 0.46125724313159194, "grad_norm": 6.642838065807771, "learning_rate": 5.86615972153022e-06, "loss": 17.7194, "step": 25234 }, { "epoch": 0.4612755223280385, "grad_norm": 6.321309871406881, "learning_rate": 5.8658681838565065e-06, "loss": 17.1463, "step": 25235 }, { "epoch": 0.46129380152448496, "grad_norm": 6.560191692175135, "learning_rate": 5.865576643147999e-06, "loss": 17.7838, "step": 25236 }, { "epoch": 0.4613120807209315, "grad_norm": 10.608667819433098, "learning_rate": 5.8652850994057184e-06, "loss": 17.9546, "step": 25237 }, { "epoch": 0.46133035991737803, "grad_norm": 5.644111811817615, "learning_rate": 5.8649935526306915e-06, "loss": 17.4048, "step": 25238 }, { "epoch": 0.46134863911382457, "grad_norm": 5.340542725307962, "learning_rate": 5.864702002823938e-06, "loss": 17.1213, "step": 25239 }, { "epoch": 0.4613669183102711, "grad_norm": 6.584077175225929, "learning_rate": 5.864410449986478e-06, "loss": 17.4067, "step": 25240 }, { "epoch": 0.4613851975067176, "grad_norm": 7.2018494393832295, "learning_rate": 5.864118894119333e-06, "loss": 17.9102, "step": 25241 }, { "epoch": 0.4614034767031641, "grad_norm": 5.720497312017708, "learning_rate": 5.863827335223526e-06, "loss": 17.1027, "step": 25242 }, { "epoch": 0.46142175589961065, "grad_norm": 6.62557114176098, "learning_rate": 5.863535773300081e-06, "loss": 17.6402, "step": 25243 }, { "epoch": 0.4614400350960572, "grad_norm": 6.843530139692724, "learning_rate": 5.863244208350017e-06, "loss": 17.7313, "step": 25244 }, { "epoch": 0.4614583142925037, "grad_norm": 6.098550955711443, "learning_rate": 5.862952640374358e-06, "loss": 17.4683, "step": 25245 }, { "epoch": 0.4614765934889502, "grad_norm": 5.8955470014577465, "learning_rate": 5.862661069374123e-06, "loss": 17.3704, "step": 25246 }, { "epoch": 0.46149487268539674, "grad_norm": 5.348734500097625, "learning_rate": 5.862369495350337e-06, "loss": 16.8836, "step": 25247 }, { "epoch": 0.4615131518818433, "grad_norm": 7.734146139843284, "learning_rate": 5.862077918304021e-06, "loss": 18.1261, "step": 25248 }, { "epoch": 0.4615314310782898, "grad_norm": 6.757837059362166, "learning_rate": 5.861786338236198e-06, "loss": 17.9775, "step": 25249 }, { "epoch": 0.46154971027473635, "grad_norm": 7.728993264104784, "learning_rate": 5.861494755147887e-06, "loss": 18.1239, "step": 25250 }, { "epoch": 0.4615679894711828, "grad_norm": 5.6011239945637, "learning_rate": 5.86120316904011e-06, "loss": 17.2771, "step": 25251 }, { "epoch": 0.46158626866762936, "grad_norm": 6.037583735673573, "learning_rate": 5.8609115799138925e-06, "loss": 17.5366, "step": 25252 }, { "epoch": 0.4616045478640759, "grad_norm": 7.21085759175853, "learning_rate": 5.860619987770256e-06, "loss": 18.182, "step": 25253 }, { "epoch": 0.46162282706052243, "grad_norm": 6.658784762118231, "learning_rate": 5.86032839261022e-06, "loss": 17.7426, "step": 25254 }, { "epoch": 0.46164110625696897, "grad_norm": 5.8194026285300415, "learning_rate": 5.860036794434807e-06, "loss": 17.4987, "step": 25255 }, { "epoch": 0.46165938545341545, "grad_norm": 6.869381633538686, "learning_rate": 5.85974519324504e-06, "loss": 17.8578, "step": 25256 }, { "epoch": 0.461677664649862, "grad_norm": 6.092164871274417, "learning_rate": 5.8594535890419405e-06, "loss": 17.3048, "step": 25257 }, { "epoch": 0.4616959438463085, "grad_norm": 5.755045555872947, "learning_rate": 5.859161981826531e-06, "loss": 17.2976, "step": 25258 }, { "epoch": 0.46171422304275506, "grad_norm": 6.588632099795179, "learning_rate": 5.858870371599833e-06, "loss": 17.2774, "step": 25259 }, { "epoch": 0.4617325022392016, "grad_norm": 6.464079790384099, "learning_rate": 5.858578758362869e-06, "loss": 17.6903, "step": 25260 }, { "epoch": 0.46175078143564807, "grad_norm": 6.743054818292839, "learning_rate": 5.858287142116661e-06, "loss": 17.5171, "step": 25261 }, { "epoch": 0.4617690606320946, "grad_norm": 8.15978190782185, "learning_rate": 5.8579955228622305e-06, "loss": 17.9251, "step": 25262 }, { "epoch": 0.46178733982854114, "grad_norm": 7.962239552499945, "learning_rate": 5.857703900600602e-06, "loss": 17.6902, "step": 25263 }, { "epoch": 0.4618056190249877, "grad_norm": 6.622267708487954, "learning_rate": 5.857412275332795e-06, "loss": 17.5825, "step": 25264 }, { "epoch": 0.46182389822143416, "grad_norm": 6.302864673146543, "learning_rate": 5.8571206470598304e-06, "loss": 17.3838, "step": 25265 }, { "epoch": 0.4618421774178807, "grad_norm": 6.524106147421126, "learning_rate": 5.856829015782734e-06, "loss": 17.4938, "step": 25266 }, { "epoch": 0.46186045661432723, "grad_norm": 6.8560153214061295, "learning_rate": 5.856537381502527e-06, "loss": 17.8109, "step": 25267 }, { "epoch": 0.46187873581077377, "grad_norm": 5.7603696737380785, "learning_rate": 5.85624574422023e-06, "loss": 17.306, "step": 25268 }, { "epoch": 0.4618970150072203, "grad_norm": 6.245738680757659, "learning_rate": 5.8559541039368654e-06, "loss": 17.4456, "step": 25269 }, { "epoch": 0.4619152942036668, "grad_norm": 9.23885331020808, "learning_rate": 5.855662460653457e-06, "loss": 17.7326, "step": 25270 }, { "epoch": 0.4619335734001133, "grad_norm": 4.698139770850545, "learning_rate": 5.855370814371024e-06, "loss": 16.9017, "step": 25271 }, { "epoch": 0.46195185259655985, "grad_norm": 6.815264462425667, "learning_rate": 5.8550791650905925e-06, "loss": 17.996, "step": 25272 }, { "epoch": 0.4619701317930064, "grad_norm": 5.414622819904023, "learning_rate": 5.854787512813183e-06, "loss": 16.9644, "step": 25273 }, { "epoch": 0.4619884109894529, "grad_norm": 8.818899491645826, "learning_rate": 5.854495857539816e-06, "loss": 18.2126, "step": 25274 }, { "epoch": 0.4620066901858994, "grad_norm": 8.150483084597855, "learning_rate": 5.854204199271515e-06, "loss": 17.9843, "step": 25275 }, { "epoch": 0.46202496938234594, "grad_norm": 7.73954821080085, "learning_rate": 5.853912538009303e-06, "loss": 18.2651, "step": 25276 }, { "epoch": 0.4620432485787925, "grad_norm": 6.402496847467016, "learning_rate": 5.853620873754202e-06, "loss": 17.48, "step": 25277 }, { "epoch": 0.462061527775239, "grad_norm": 6.084215762293935, "learning_rate": 5.853329206507234e-06, "loss": 17.6089, "step": 25278 }, { "epoch": 0.46207980697168555, "grad_norm": 7.669596020238035, "learning_rate": 5.85303753626942e-06, "loss": 18.0464, "step": 25279 }, { "epoch": 0.462098086168132, "grad_norm": 7.3662375825774316, "learning_rate": 5.852745863041786e-06, "loss": 17.9197, "step": 25280 }, { "epoch": 0.46211636536457856, "grad_norm": 7.026272663659752, "learning_rate": 5.85245418682535e-06, "loss": 17.8691, "step": 25281 }, { "epoch": 0.4621346445610251, "grad_norm": 5.535689058701501, "learning_rate": 5.852162507621135e-06, "loss": 17.3449, "step": 25282 }, { "epoch": 0.46215292375747163, "grad_norm": 6.74302090668274, "learning_rate": 5.851870825430165e-06, "loss": 17.4523, "step": 25283 }, { "epoch": 0.46217120295391817, "grad_norm": 6.12401175446677, "learning_rate": 5.851579140253463e-06, "loss": 17.1992, "step": 25284 }, { "epoch": 0.46218948215036465, "grad_norm": 5.439906872526443, "learning_rate": 5.851287452092048e-06, "loss": 17.3654, "step": 25285 }, { "epoch": 0.4622077613468112, "grad_norm": 7.301342799027574, "learning_rate": 5.850995760946946e-06, "loss": 17.372, "step": 25286 }, { "epoch": 0.4622260405432577, "grad_norm": 7.031633964986034, "learning_rate": 5.850704066819177e-06, "loss": 17.9206, "step": 25287 }, { "epoch": 0.46224431973970426, "grad_norm": 5.336220084573948, "learning_rate": 5.850412369709764e-06, "loss": 17.0533, "step": 25288 }, { "epoch": 0.4622625989361508, "grad_norm": 5.539355997825748, "learning_rate": 5.8501206696197296e-06, "loss": 17.2112, "step": 25289 }, { "epoch": 0.4622808781325973, "grad_norm": 7.1189937992224275, "learning_rate": 5.849828966550098e-06, "loss": 17.7607, "step": 25290 }, { "epoch": 0.4622991573290438, "grad_norm": 6.133941928886113, "learning_rate": 5.849537260501886e-06, "loss": 17.5482, "step": 25291 }, { "epoch": 0.46231743652549034, "grad_norm": 6.225187836437039, "learning_rate": 5.849245551476122e-06, "loss": 17.4509, "step": 25292 }, { "epoch": 0.4623357157219369, "grad_norm": 7.658657985499206, "learning_rate": 5.8489538394738245e-06, "loss": 17.999, "step": 25293 }, { "epoch": 0.4623539949183834, "grad_norm": 5.98802306611786, "learning_rate": 5.84866212449602e-06, "loss": 17.285, "step": 25294 }, { "epoch": 0.4623722741148299, "grad_norm": 7.570375819435523, "learning_rate": 5.848370406543727e-06, "loss": 18.1253, "step": 25295 }, { "epoch": 0.46239055331127643, "grad_norm": 8.033185384736198, "learning_rate": 5.848078685617967e-06, "loss": 18.0514, "step": 25296 }, { "epoch": 0.46240883250772297, "grad_norm": 6.587730052328041, "learning_rate": 5.847786961719768e-06, "loss": 17.3455, "step": 25297 }, { "epoch": 0.4624271117041695, "grad_norm": 7.186198500662436, "learning_rate": 5.847495234850148e-06, "loss": 18.0893, "step": 25298 }, { "epoch": 0.462445390900616, "grad_norm": 6.613017319394958, "learning_rate": 5.8472035050101305e-06, "loss": 17.8159, "step": 25299 }, { "epoch": 0.4624636700970625, "grad_norm": 7.119548517905186, "learning_rate": 5.846911772200738e-06, "loss": 18.079, "step": 25300 }, { "epoch": 0.46248194929350905, "grad_norm": 5.0024120341206295, "learning_rate": 5.846620036422994e-06, "loss": 16.9131, "step": 25301 }, { "epoch": 0.4625002284899556, "grad_norm": 5.990592914536484, "learning_rate": 5.84632829767792e-06, "loss": 17.4368, "step": 25302 }, { "epoch": 0.4625185076864021, "grad_norm": 6.52735317668258, "learning_rate": 5.8460365559665385e-06, "loss": 17.2994, "step": 25303 }, { "epoch": 0.4625367868828486, "grad_norm": 6.455628074021686, "learning_rate": 5.845744811289874e-06, "loss": 17.6648, "step": 25304 }, { "epoch": 0.46255506607929514, "grad_norm": 7.138957096937389, "learning_rate": 5.845453063648945e-06, "loss": 17.9178, "step": 25305 }, { "epoch": 0.4625733452757417, "grad_norm": 6.196540200439552, "learning_rate": 5.845161313044777e-06, "loss": 17.5285, "step": 25306 }, { "epoch": 0.4625916244721882, "grad_norm": 6.210113427208343, "learning_rate": 5.844869559478392e-06, "loss": 17.61, "step": 25307 }, { "epoch": 0.46260990366863475, "grad_norm": 7.13312957663764, "learning_rate": 5.844577802950815e-06, "loss": 17.6366, "step": 25308 }, { "epoch": 0.46262818286508123, "grad_norm": 6.990945895824653, "learning_rate": 5.844286043463063e-06, "loss": 17.8473, "step": 25309 }, { "epoch": 0.46264646206152776, "grad_norm": 5.8883079948583665, "learning_rate": 5.843994281016161e-06, "loss": 17.3688, "step": 25310 }, { "epoch": 0.4626647412579743, "grad_norm": 6.139783268220673, "learning_rate": 5.843702515611136e-06, "loss": 17.429, "step": 25311 }, { "epoch": 0.46268302045442083, "grad_norm": 6.756691423272204, "learning_rate": 5.843410747249004e-06, "loss": 17.6165, "step": 25312 }, { "epoch": 0.46270129965086737, "grad_norm": 6.564538204662522, "learning_rate": 5.843118975930792e-06, "loss": 17.2877, "step": 25313 }, { "epoch": 0.46271957884731385, "grad_norm": 5.636229527122742, "learning_rate": 5.8428272016575196e-06, "loss": 17.2892, "step": 25314 }, { "epoch": 0.4627378580437604, "grad_norm": 6.562940087437146, "learning_rate": 5.8425354244302116e-06, "loss": 17.2753, "step": 25315 }, { "epoch": 0.4627561372402069, "grad_norm": 5.201640249630994, "learning_rate": 5.84224364424989e-06, "loss": 17.1833, "step": 25316 }, { "epoch": 0.46277441643665346, "grad_norm": 6.451804603219493, "learning_rate": 5.841951861117578e-06, "loss": 17.3407, "step": 25317 }, { "epoch": 0.4627926956331, "grad_norm": 6.161920593051882, "learning_rate": 5.8416600750342985e-06, "loss": 17.5662, "step": 25318 }, { "epoch": 0.4628109748295465, "grad_norm": 6.2909873660523905, "learning_rate": 5.8413682860010715e-06, "loss": 17.4847, "step": 25319 }, { "epoch": 0.462829254025993, "grad_norm": 6.678807603183959, "learning_rate": 5.841076494018922e-06, "loss": 17.5152, "step": 25320 }, { "epoch": 0.46284753322243954, "grad_norm": 6.201930651209116, "learning_rate": 5.840784699088873e-06, "loss": 17.5612, "step": 25321 }, { "epoch": 0.4628658124188861, "grad_norm": 6.736141269907265, "learning_rate": 5.840492901211949e-06, "loss": 17.7945, "step": 25322 }, { "epoch": 0.4628840916153326, "grad_norm": 6.219596559882062, "learning_rate": 5.8402011003891665e-06, "loss": 17.289, "step": 25323 }, { "epoch": 0.4629023708117791, "grad_norm": 6.894170696110101, "learning_rate": 5.839909296621553e-06, "loss": 17.5829, "step": 25324 }, { "epoch": 0.46292065000822563, "grad_norm": 6.866118240618525, "learning_rate": 5.839617489910132e-06, "loss": 17.7614, "step": 25325 }, { "epoch": 0.46293892920467217, "grad_norm": 7.584770837758176, "learning_rate": 5.839325680255923e-06, "loss": 18.1404, "step": 25326 }, { "epoch": 0.4629572084011187, "grad_norm": 6.497741617928724, "learning_rate": 5.839033867659951e-06, "loss": 17.4433, "step": 25327 }, { "epoch": 0.46297548759756524, "grad_norm": 6.794961528294284, "learning_rate": 5.8387420521232375e-06, "loss": 17.5179, "step": 25328 }, { "epoch": 0.4629937667940117, "grad_norm": 6.43904306414136, "learning_rate": 5.8384502336468065e-06, "loss": 17.4582, "step": 25329 }, { "epoch": 0.46301204599045825, "grad_norm": 9.384331646384522, "learning_rate": 5.838158412231679e-06, "loss": 18.4846, "step": 25330 }, { "epoch": 0.4630303251869048, "grad_norm": 7.290277001149687, "learning_rate": 5.83786658787888e-06, "loss": 17.895, "step": 25331 }, { "epoch": 0.4630486043833513, "grad_norm": 7.414494454667557, "learning_rate": 5.8375747605894305e-06, "loss": 17.7033, "step": 25332 }, { "epoch": 0.4630668835797978, "grad_norm": 5.793884845650024, "learning_rate": 5.837282930364355e-06, "loss": 17.3952, "step": 25333 }, { "epoch": 0.46308516277624434, "grad_norm": 7.216531494080016, "learning_rate": 5.836991097204676e-06, "loss": 18.011, "step": 25334 }, { "epoch": 0.4631034419726909, "grad_norm": 6.921636301413677, "learning_rate": 5.836699261111416e-06, "loss": 17.6392, "step": 25335 }, { "epoch": 0.4631217211691374, "grad_norm": 6.148023916485857, "learning_rate": 5.836407422085597e-06, "loss": 17.5761, "step": 25336 }, { "epoch": 0.46314000036558395, "grad_norm": 6.454094203469489, "learning_rate": 5.836115580128241e-06, "loss": 17.5949, "step": 25337 }, { "epoch": 0.46315827956203043, "grad_norm": 7.478081110918355, "learning_rate": 5.835823735240374e-06, "loss": 17.8644, "step": 25338 }, { "epoch": 0.46317655875847696, "grad_norm": 6.6794437943755565, "learning_rate": 5.835531887423018e-06, "loss": 17.6262, "step": 25339 }, { "epoch": 0.4631948379549235, "grad_norm": 6.636371695461813, "learning_rate": 5.835240036677195e-06, "loss": 17.4355, "step": 25340 }, { "epoch": 0.46321311715137004, "grad_norm": 6.4468585853660105, "learning_rate": 5.834948183003927e-06, "loss": 17.5162, "step": 25341 }, { "epoch": 0.46323139634781657, "grad_norm": 6.415564959924538, "learning_rate": 5.8346563264042376e-06, "loss": 17.4264, "step": 25342 }, { "epoch": 0.46324967554426305, "grad_norm": 6.0162155921609255, "learning_rate": 5.8343644668791525e-06, "loss": 17.5026, "step": 25343 }, { "epoch": 0.4632679547407096, "grad_norm": 5.887120949260308, "learning_rate": 5.83407260442969e-06, "loss": 17.2072, "step": 25344 }, { "epoch": 0.4632862339371561, "grad_norm": 7.261838044243021, "learning_rate": 5.833780739056877e-06, "loss": 17.6527, "step": 25345 }, { "epoch": 0.46330451313360266, "grad_norm": 8.076103650438975, "learning_rate": 5.833488870761734e-06, "loss": 18.1868, "step": 25346 }, { "epoch": 0.4633227923300492, "grad_norm": 6.297252061462196, "learning_rate": 5.833196999545285e-06, "loss": 17.7289, "step": 25347 }, { "epoch": 0.4633410715264957, "grad_norm": 7.313819585494033, "learning_rate": 5.832905125408553e-06, "loss": 18.4109, "step": 25348 }, { "epoch": 0.4633593507229422, "grad_norm": 7.601253779940887, "learning_rate": 5.832613248352562e-06, "loss": 17.9015, "step": 25349 }, { "epoch": 0.46337762991938874, "grad_norm": 6.179775210548726, "learning_rate": 5.832321368378333e-06, "loss": 17.3932, "step": 25350 }, { "epoch": 0.4633959091158353, "grad_norm": 6.334473440110927, "learning_rate": 5.832029485486888e-06, "loss": 17.4904, "step": 25351 }, { "epoch": 0.4634141883122818, "grad_norm": 6.247934125522594, "learning_rate": 5.831737599679254e-06, "loss": 17.3286, "step": 25352 }, { "epoch": 0.4634324675087283, "grad_norm": 7.355119546749972, "learning_rate": 5.831445710956452e-06, "loss": 18.326, "step": 25353 }, { "epoch": 0.46345074670517483, "grad_norm": 6.091327128978012, "learning_rate": 5.831153819319504e-06, "loss": 17.6493, "step": 25354 }, { "epoch": 0.46346902590162137, "grad_norm": 6.971561419558052, "learning_rate": 5.830861924769433e-06, "loss": 17.8274, "step": 25355 }, { "epoch": 0.4634873050980679, "grad_norm": 5.619793787453107, "learning_rate": 5.830570027307265e-06, "loss": 17.236, "step": 25356 }, { "epoch": 0.46350558429451444, "grad_norm": 5.792039180591749, "learning_rate": 5.83027812693402e-06, "loss": 17.3111, "step": 25357 }, { "epoch": 0.4635238634909609, "grad_norm": 7.829832211260318, "learning_rate": 5.829986223650722e-06, "loss": 18.3417, "step": 25358 }, { "epoch": 0.46354214268740745, "grad_norm": 6.0999711054193675, "learning_rate": 5.8296943174583955e-06, "loss": 17.3464, "step": 25359 }, { "epoch": 0.463560421883854, "grad_norm": 6.605675552514442, "learning_rate": 5.829402408358061e-06, "loss": 17.3732, "step": 25360 }, { "epoch": 0.4635787010803005, "grad_norm": 5.863169034171898, "learning_rate": 5.829110496350744e-06, "loss": 17.1284, "step": 25361 }, { "epoch": 0.46359698027674706, "grad_norm": 7.091978667450804, "learning_rate": 5.828818581437467e-06, "loss": 17.7498, "step": 25362 }, { "epoch": 0.46361525947319354, "grad_norm": 6.241537816369766, "learning_rate": 5.828526663619253e-06, "loss": 17.4417, "step": 25363 }, { "epoch": 0.4636335386696401, "grad_norm": 7.3549456243561, "learning_rate": 5.8282347428971235e-06, "loss": 17.5559, "step": 25364 }, { "epoch": 0.4636518178660866, "grad_norm": 5.578223423588588, "learning_rate": 5.8279428192721035e-06, "loss": 16.9686, "step": 25365 }, { "epoch": 0.46367009706253315, "grad_norm": 7.081109685546922, "learning_rate": 5.8276508927452165e-06, "loss": 17.5608, "step": 25366 }, { "epoch": 0.46368837625897963, "grad_norm": 5.8333112989986144, "learning_rate": 5.827358963317485e-06, "loss": 17.3277, "step": 25367 }, { "epoch": 0.46370665545542616, "grad_norm": 6.273552577904575, "learning_rate": 5.827067030989931e-06, "loss": 17.3467, "step": 25368 }, { "epoch": 0.4637249346518727, "grad_norm": 5.6332370903147675, "learning_rate": 5.826775095763578e-06, "loss": 17.3149, "step": 25369 }, { "epoch": 0.46374321384831924, "grad_norm": 5.125673178882156, "learning_rate": 5.826483157639453e-06, "loss": 16.8984, "step": 25370 }, { "epoch": 0.46376149304476577, "grad_norm": 5.542666792038269, "learning_rate": 5.826191216618574e-06, "loss": 17.3516, "step": 25371 }, { "epoch": 0.46377977224121225, "grad_norm": 5.973093200492799, "learning_rate": 5.825899272701968e-06, "loss": 17.3629, "step": 25372 }, { "epoch": 0.4637980514376588, "grad_norm": 7.58868573698555, "learning_rate": 5.825607325890655e-06, "loss": 17.8242, "step": 25373 }, { "epoch": 0.4638163306341053, "grad_norm": 6.26002998800929, "learning_rate": 5.825315376185662e-06, "loss": 17.1687, "step": 25374 }, { "epoch": 0.46383460983055186, "grad_norm": 5.446282005113881, "learning_rate": 5.825023423588009e-06, "loss": 17.0268, "step": 25375 }, { "epoch": 0.4638528890269984, "grad_norm": 6.700845822706817, "learning_rate": 5.82473146809872e-06, "loss": 17.542, "step": 25376 }, { "epoch": 0.4638711682234449, "grad_norm": 5.392900742683065, "learning_rate": 5.82443950971882e-06, "loss": 16.9712, "step": 25377 }, { "epoch": 0.4638894474198914, "grad_norm": 8.590892151220999, "learning_rate": 5.824147548449329e-06, "loss": 18.2489, "step": 25378 }, { "epoch": 0.46390772661633795, "grad_norm": 6.090399025769546, "learning_rate": 5.823855584291274e-06, "loss": 17.465, "step": 25379 }, { "epoch": 0.4639260058127845, "grad_norm": 7.01374334481513, "learning_rate": 5.823563617245678e-06, "loss": 17.891, "step": 25380 }, { "epoch": 0.463944285009231, "grad_norm": 8.63108118246894, "learning_rate": 5.8232716473135605e-06, "loss": 17.4832, "step": 25381 }, { "epoch": 0.4639625642056775, "grad_norm": 6.861777262285128, "learning_rate": 5.8229796744959485e-06, "loss": 17.8547, "step": 25382 }, { "epoch": 0.46398084340212403, "grad_norm": 6.175446626925928, "learning_rate": 5.822687698793863e-06, "loss": 17.1716, "step": 25383 }, { "epoch": 0.46399912259857057, "grad_norm": 4.995754632381177, "learning_rate": 5.82239572020833e-06, "loss": 16.9765, "step": 25384 }, { "epoch": 0.4640174017950171, "grad_norm": 6.297137853896752, "learning_rate": 5.8221037387403715e-06, "loss": 17.3626, "step": 25385 }, { "epoch": 0.46403568099146364, "grad_norm": 5.997894405928749, "learning_rate": 5.82181175439101e-06, "loss": 17.3152, "step": 25386 }, { "epoch": 0.4640539601879101, "grad_norm": 9.412705800128995, "learning_rate": 5.821519767161269e-06, "loss": 17.9353, "step": 25387 }, { "epoch": 0.46407223938435666, "grad_norm": 5.846131578774165, "learning_rate": 5.821227777052173e-06, "loss": 17.7164, "step": 25388 }, { "epoch": 0.4640905185808032, "grad_norm": 6.777301688189443, "learning_rate": 5.820935784064745e-06, "loss": 17.3869, "step": 25389 }, { "epoch": 0.4641087977772497, "grad_norm": 7.3930174262124675, "learning_rate": 5.820643788200009e-06, "loss": 17.6377, "step": 25390 }, { "epoch": 0.46412707697369626, "grad_norm": 6.179387443757268, "learning_rate": 5.8203517894589865e-06, "loss": 17.3686, "step": 25391 }, { "epoch": 0.46414535617014274, "grad_norm": 6.3682285265058525, "learning_rate": 5.820059787842702e-06, "loss": 17.4647, "step": 25392 }, { "epoch": 0.4641636353665893, "grad_norm": 6.687257072835672, "learning_rate": 5.8197677833521805e-06, "loss": 17.3832, "step": 25393 }, { "epoch": 0.4641819145630358, "grad_norm": 8.697887815710882, "learning_rate": 5.819475775988445e-06, "loss": 18.3869, "step": 25394 }, { "epoch": 0.46420019375948235, "grad_norm": 6.373782285258622, "learning_rate": 5.819183765752516e-06, "loss": 17.4581, "step": 25395 }, { "epoch": 0.4642184729559289, "grad_norm": 5.93386617840029, "learning_rate": 5.818891752645418e-06, "loss": 17.1143, "step": 25396 }, { "epoch": 0.46423675215237536, "grad_norm": 5.622582804489323, "learning_rate": 5.818599736668178e-06, "loss": 17.1578, "step": 25397 }, { "epoch": 0.4642550313488219, "grad_norm": 5.715015822660491, "learning_rate": 5.8183077178218166e-06, "loss": 17.1295, "step": 25398 }, { "epoch": 0.46427331054526844, "grad_norm": 6.563659242416588, "learning_rate": 5.8180156961073566e-06, "loss": 17.4553, "step": 25399 }, { "epoch": 0.46429158974171497, "grad_norm": 5.851177216489818, "learning_rate": 5.817723671525822e-06, "loss": 17.2711, "step": 25400 }, { "epoch": 0.46430986893816145, "grad_norm": 7.7478468610353, "learning_rate": 5.8174316440782375e-06, "loss": 17.9985, "step": 25401 }, { "epoch": 0.464328148134608, "grad_norm": 7.663742962480571, "learning_rate": 5.8171396137656265e-06, "loss": 18.1791, "step": 25402 }, { "epoch": 0.4643464273310545, "grad_norm": 5.864716069555697, "learning_rate": 5.8168475805890125e-06, "loss": 17.3009, "step": 25403 }, { "epoch": 0.46436470652750106, "grad_norm": 7.865864842195836, "learning_rate": 5.816555544549418e-06, "loss": 18.2962, "step": 25404 }, { "epoch": 0.4643829857239476, "grad_norm": 5.933238752604491, "learning_rate": 5.8162635056478665e-06, "loss": 17.2437, "step": 25405 }, { "epoch": 0.4644012649203941, "grad_norm": 6.257970487848118, "learning_rate": 5.815971463885383e-06, "loss": 17.3387, "step": 25406 }, { "epoch": 0.4644195441168406, "grad_norm": 5.873520787476609, "learning_rate": 5.815679419262989e-06, "loss": 17.2506, "step": 25407 }, { "epoch": 0.46443782331328715, "grad_norm": 5.236429270078546, "learning_rate": 5.815387371781713e-06, "loss": 16.9959, "step": 25408 }, { "epoch": 0.4644561025097337, "grad_norm": 6.122325410689532, "learning_rate": 5.815095321442572e-06, "loss": 17.189, "step": 25409 }, { "epoch": 0.4644743817061802, "grad_norm": 6.761716591140318, "learning_rate": 5.814803268246593e-06, "loss": 18.0985, "step": 25410 }, { "epoch": 0.4644926609026267, "grad_norm": 6.242224172683804, "learning_rate": 5.8145112121948e-06, "loss": 17.6028, "step": 25411 }, { "epoch": 0.46451094009907323, "grad_norm": 7.912110355549454, "learning_rate": 5.814219153288215e-06, "loss": 17.3915, "step": 25412 }, { "epoch": 0.46452921929551977, "grad_norm": 7.727862681621994, "learning_rate": 5.813927091527864e-06, "loss": 17.9699, "step": 25413 }, { "epoch": 0.4645474984919663, "grad_norm": 6.419234879825803, "learning_rate": 5.813635026914767e-06, "loss": 17.6021, "step": 25414 }, { "epoch": 0.46456577768841284, "grad_norm": 6.38559028325601, "learning_rate": 5.813342959449951e-06, "loss": 17.4399, "step": 25415 }, { "epoch": 0.4645840568848593, "grad_norm": 6.663499737680368, "learning_rate": 5.813050889134438e-06, "loss": 17.4291, "step": 25416 }, { "epoch": 0.46460233608130586, "grad_norm": 7.055634140752238, "learning_rate": 5.812758815969253e-06, "loss": 17.7337, "step": 25417 }, { "epoch": 0.4646206152777524, "grad_norm": 5.865997723408139, "learning_rate": 5.812466739955418e-06, "loss": 17.0736, "step": 25418 }, { "epoch": 0.4646388944741989, "grad_norm": 7.534559974340752, "learning_rate": 5.8121746610939575e-06, "loss": 17.673, "step": 25419 }, { "epoch": 0.46465717367064546, "grad_norm": 7.6623166898443555, "learning_rate": 5.811882579385897e-06, "loss": 18.3083, "step": 25420 }, { "epoch": 0.46467545286709194, "grad_norm": 7.165215723988402, "learning_rate": 5.8115904948322565e-06, "loss": 17.5709, "step": 25421 }, { "epoch": 0.4646937320635385, "grad_norm": 8.324835671437848, "learning_rate": 5.811298407434064e-06, "loss": 18.0978, "step": 25422 }, { "epoch": 0.464712011259985, "grad_norm": 7.465843154080268, "learning_rate": 5.811006317192338e-06, "loss": 18.0381, "step": 25423 }, { "epoch": 0.46473029045643155, "grad_norm": 6.404398169454394, "learning_rate": 5.810714224108107e-06, "loss": 17.35, "step": 25424 }, { "epoch": 0.4647485696528781, "grad_norm": 5.558068167350239, "learning_rate": 5.810422128182393e-06, "loss": 17.0357, "step": 25425 }, { "epoch": 0.46476684884932457, "grad_norm": 7.94388708563255, "learning_rate": 5.810130029416221e-06, "loss": 18.3155, "step": 25426 }, { "epoch": 0.4647851280457711, "grad_norm": 10.070031384060563, "learning_rate": 5.809837927810612e-06, "loss": 18.129, "step": 25427 }, { "epoch": 0.46480340724221764, "grad_norm": 6.813759307518242, "learning_rate": 5.8095458233665915e-06, "loss": 17.3762, "step": 25428 }, { "epoch": 0.46482168643866417, "grad_norm": 8.24614842268811, "learning_rate": 5.8092537160851825e-06, "loss": 18.2599, "step": 25429 }, { "epoch": 0.4648399656351107, "grad_norm": 7.886666816792465, "learning_rate": 5.80896160596741e-06, "loss": 17.9468, "step": 25430 }, { "epoch": 0.4648582448315572, "grad_norm": 6.734079013518263, "learning_rate": 5.808669493014297e-06, "loss": 17.51, "step": 25431 }, { "epoch": 0.4648765240280037, "grad_norm": 5.77974965198436, "learning_rate": 5.808377377226868e-06, "loss": 17.169, "step": 25432 }, { "epoch": 0.46489480322445026, "grad_norm": 7.621575056975452, "learning_rate": 5.808085258606146e-06, "loss": 17.8084, "step": 25433 }, { "epoch": 0.4649130824208968, "grad_norm": 5.757923524112118, "learning_rate": 5.807793137153156e-06, "loss": 17.0912, "step": 25434 }, { "epoch": 0.4649313616173433, "grad_norm": 5.969163553293595, "learning_rate": 5.8075010128689226e-06, "loss": 17.1662, "step": 25435 }, { "epoch": 0.4649496408137898, "grad_norm": 7.14085071453925, "learning_rate": 5.807208885754466e-06, "loss": 17.5669, "step": 25436 }, { "epoch": 0.46496792001023635, "grad_norm": 6.7941607972969935, "learning_rate": 5.806916755810812e-06, "loss": 17.5949, "step": 25437 }, { "epoch": 0.4649861992066829, "grad_norm": 7.7719566649871785, "learning_rate": 5.806624623038985e-06, "loss": 17.5299, "step": 25438 }, { "epoch": 0.4650044784031294, "grad_norm": 6.084064617451337, "learning_rate": 5.80633248744001e-06, "loss": 17.7187, "step": 25439 }, { "epoch": 0.4650227575995759, "grad_norm": 6.3385224137895415, "learning_rate": 5.806040349014908e-06, "loss": 17.3292, "step": 25440 }, { "epoch": 0.46504103679602243, "grad_norm": 5.967159038319389, "learning_rate": 5.805748207764707e-06, "loss": 17.1518, "step": 25441 }, { "epoch": 0.46505931599246897, "grad_norm": 7.480021498899485, "learning_rate": 5.805456063690426e-06, "loss": 17.6674, "step": 25442 }, { "epoch": 0.4650775951889155, "grad_norm": 5.934298317671547, "learning_rate": 5.805163916793092e-06, "loss": 17.2422, "step": 25443 }, { "epoch": 0.46509587438536204, "grad_norm": 5.419061835388019, "learning_rate": 5.804871767073729e-06, "loss": 17.2889, "step": 25444 }, { "epoch": 0.4651141535818085, "grad_norm": 6.091419538500869, "learning_rate": 5.804579614533359e-06, "loss": 17.4997, "step": 25445 }, { "epoch": 0.46513243277825506, "grad_norm": 5.920096238098323, "learning_rate": 5.804287459173008e-06, "loss": 17.3415, "step": 25446 }, { "epoch": 0.4651507119747016, "grad_norm": 8.725667131244787, "learning_rate": 5.8039953009937e-06, "loss": 17.7631, "step": 25447 }, { "epoch": 0.4651689911711481, "grad_norm": 6.398170107632234, "learning_rate": 5.803703139996457e-06, "loss": 17.2332, "step": 25448 }, { "epoch": 0.46518727036759466, "grad_norm": 6.594791005756539, "learning_rate": 5.803410976182306e-06, "loss": 17.3875, "step": 25449 }, { "epoch": 0.46520554956404114, "grad_norm": 5.356000716930894, "learning_rate": 5.803118809552268e-06, "loss": 17.138, "step": 25450 }, { "epoch": 0.4652238287604877, "grad_norm": 6.044378429430535, "learning_rate": 5.802826640107367e-06, "loss": 17.1697, "step": 25451 }, { "epoch": 0.4652421079569342, "grad_norm": 7.951520853661495, "learning_rate": 5.802534467848629e-06, "loss": 18.0788, "step": 25452 }, { "epoch": 0.46526038715338075, "grad_norm": 6.48639673924519, "learning_rate": 5.80224229277708e-06, "loss": 17.3391, "step": 25453 }, { "epoch": 0.4652786663498273, "grad_norm": 5.80189892277259, "learning_rate": 5.80195011489374e-06, "loss": 17.255, "step": 25454 }, { "epoch": 0.46529694554627377, "grad_norm": 6.871948405060312, "learning_rate": 5.801657934199633e-06, "loss": 17.7307, "step": 25455 }, { "epoch": 0.4653152247427203, "grad_norm": 6.067583718378898, "learning_rate": 5.801365750695786e-06, "loss": 17.0874, "step": 25456 }, { "epoch": 0.46533350393916684, "grad_norm": 6.5609355757583945, "learning_rate": 5.801073564383219e-06, "loss": 17.272, "step": 25457 }, { "epoch": 0.4653517831356134, "grad_norm": 6.271905588458909, "learning_rate": 5.800781375262962e-06, "loss": 17.386, "step": 25458 }, { "epoch": 0.4653700623320599, "grad_norm": 7.648823117763026, "learning_rate": 5.800489183336033e-06, "loss": 17.7272, "step": 25459 }, { "epoch": 0.4653883415285064, "grad_norm": 8.224700023842017, "learning_rate": 5.800196988603461e-06, "loss": 17.8575, "step": 25460 }, { "epoch": 0.4654066207249529, "grad_norm": 7.975275975421861, "learning_rate": 5.799904791066266e-06, "loss": 17.7064, "step": 25461 }, { "epoch": 0.46542489992139946, "grad_norm": 7.692745960079071, "learning_rate": 5.799612590725477e-06, "loss": 18.0154, "step": 25462 }, { "epoch": 0.465443179117846, "grad_norm": 6.198587248863337, "learning_rate": 5.799320387582113e-06, "loss": 17.4678, "step": 25463 }, { "epoch": 0.46546145831429253, "grad_norm": 5.770454018950131, "learning_rate": 5.7990281816372e-06, "loss": 17.1268, "step": 25464 }, { "epoch": 0.465479737510739, "grad_norm": 8.128152895873397, "learning_rate": 5.798735972891764e-06, "loss": 18.4631, "step": 25465 }, { "epoch": 0.46549801670718555, "grad_norm": 4.776212786610156, "learning_rate": 5.798443761346828e-06, "loss": 16.8338, "step": 25466 }, { "epoch": 0.4655162959036321, "grad_norm": 8.268703185539234, "learning_rate": 5.798151547003416e-06, "loss": 17.7133, "step": 25467 }, { "epoch": 0.4655345751000786, "grad_norm": 6.109843705524758, "learning_rate": 5.797859329862551e-06, "loss": 17.4378, "step": 25468 }, { "epoch": 0.4655528542965251, "grad_norm": 6.166562862592454, "learning_rate": 5.7975671099252575e-06, "loss": 17.7501, "step": 25469 }, { "epoch": 0.46557113349297163, "grad_norm": 5.4298879723126285, "learning_rate": 5.797274887192562e-06, "loss": 17.3158, "step": 25470 }, { "epoch": 0.46558941268941817, "grad_norm": 6.311308844092306, "learning_rate": 5.796982661665487e-06, "loss": 17.1825, "step": 25471 }, { "epoch": 0.4656076918858647, "grad_norm": 6.356383502204275, "learning_rate": 5.796690433345056e-06, "loss": 17.1621, "step": 25472 }, { "epoch": 0.46562597108231124, "grad_norm": 6.694012136081018, "learning_rate": 5.796398202232295e-06, "loss": 17.6111, "step": 25473 }, { "epoch": 0.4656442502787577, "grad_norm": 5.5338284597810485, "learning_rate": 5.796105968328227e-06, "loss": 17.1924, "step": 25474 }, { "epoch": 0.46566252947520426, "grad_norm": 6.168919143212222, "learning_rate": 5.795813731633877e-06, "loss": 17.3206, "step": 25475 }, { "epoch": 0.4656808086716508, "grad_norm": 5.722622995627302, "learning_rate": 5.795521492150269e-06, "loss": 17.2056, "step": 25476 }, { "epoch": 0.46569908786809733, "grad_norm": 5.637556516954348, "learning_rate": 5.795229249878427e-06, "loss": 17.1989, "step": 25477 }, { "epoch": 0.46571736706454386, "grad_norm": 6.9322596818191276, "learning_rate": 5.794937004819374e-06, "loss": 17.6157, "step": 25478 }, { "epoch": 0.46573564626099034, "grad_norm": 6.298532134776038, "learning_rate": 5.794644756974138e-06, "loss": 17.4987, "step": 25479 }, { "epoch": 0.4657539254574369, "grad_norm": 5.948370314153637, "learning_rate": 5.7943525063437415e-06, "loss": 17.3412, "step": 25480 }, { "epoch": 0.4657722046538834, "grad_norm": 6.1791203787472515, "learning_rate": 5.7940602529292065e-06, "loss": 17.5173, "step": 25481 }, { "epoch": 0.46579048385032995, "grad_norm": 7.65046005754977, "learning_rate": 5.793767996731561e-06, "loss": 17.9836, "step": 25482 }, { "epoch": 0.4658087630467765, "grad_norm": 6.968752098824836, "learning_rate": 5.793475737751825e-06, "loss": 17.7423, "step": 25483 }, { "epoch": 0.46582704224322297, "grad_norm": 5.743834748829708, "learning_rate": 5.793183475991028e-06, "loss": 17.2186, "step": 25484 }, { "epoch": 0.4658453214396695, "grad_norm": 6.34696715960195, "learning_rate": 5.79289121145019e-06, "loss": 17.4219, "step": 25485 }, { "epoch": 0.46586360063611604, "grad_norm": 11.808143119530456, "learning_rate": 5.792598944130338e-06, "loss": 17.6976, "step": 25486 }, { "epoch": 0.4658818798325626, "grad_norm": 8.46839004191002, "learning_rate": 5.7923066740324954e-06, "loss": 18.5328, "step": 25487 }, { "epoch": 0.4659001590290091, "grad_norm": 7.399257564711092, "learning_rate": 5.792014401157686e-06, "loss": 17.7309, "step": 25488 }, { "epoch": 0.4659184382254556, "grad_norm": 7.342365479060027, "learning_rate": 5.791722125506935e-06, "loss": 17.8551, "step": 25489 }, { "epoch": 0.4659367174219021, "grad_norm": 6.414269926068148, "learning_rate": 5.791429847081268e-06, "loss": 17.376, "step": 25490 }, { "epoch": 0.46595499661834866, "grad_norm": 6.692070420332744, "learning_rate": 5.791137565881706e-06, "loss": 17.1239, "step": 25491 }, { "epoch": 0.4659732758147952, "grad_norm": 5.941832870729837, "learning_rate": 5.790845281909278e-06, "loss": 17.342, "step": 25492 }, { "epoch": 0.46599155501124173, "grad_norm": 7.054093746543174, "learning_rate": 5.790552995165003e-06, "loss": 17.82, "step": 25493 }, { "epoch": 0.4660098342076882, "grad_norm": 7.1926783141619035, "learning_rate": 5.790260705649912e-06, "loss": 17.8253, "step": 25494 }, { "epoch": 0.46602811340413475, "grad_norm": 7.290353227204587, "learning_rate": 5.789968413365022e-06, "loss": 17.8883, "step": 25495 }, { "epoch": 0.4660463926005813, "grad_norm": 8.000022510651304, "learning_rate": 5.789676118311362e-06, "loss": 17.6913, "step": 25496 }, { "epoch": 0.4660646717970278, "grad_norm": 5.7223660166521855, "learning_rate": 5.789383820489958e-06, "loss": 17.2571, "step": 25497 }, { "epoch": 0.46608295099347435, "grad_norm": 6.014089842940683, "learning_rate": 5.7890915199018305e-06, "loss": 17.3348, "step": 25498 }, { "epoch": 0.46610123018992083, "grad_norm": 7.728944553249146, "learning_rate": 5.788799216548007e-06, "loss": 18.1525, "step": 25499 }, { "epoch": 0.46611950938636737, "grad_norm": 6.740202405766377, "learning_rate": 5.788506910429509e-06, "loss": 18.0152, "step": 25500 }, { "epoch": 0.4661377885828139, "grad_norm": 5.878783018698597, "learning_rate": 5.7882146015473635e-06, "loss": 17.3935, "step": 25501 }, { "epoch": 0.46615606777926044, "grad_norm": 5.514175149194307, "learning_rate": 5.787922289902594e-06, "loss": 17.1685, "step": 25502 }, { "epoch": 0.4661743469757069, "grad_norm": 5.575428012857195, "learning_rate": 5.787629975496225e-06, "loss": 17.3216, "step": 25503 }, { "epoch": 0.46619262617215346, "grad_norm": 6.837583545419483, "learning_rate": 5.787337658329283e-06, "loss": 17.6341, "step": 25504 }, { "epoch": 0.4662109053686, "grad_norm": 6.86522844280634, "learning_rate": 5.787045338402788e-06, "loss": 17.6928, "step": 25505 }, { "epoch": 0.46622918456504653, "grad_norm": 6.6057154665080375, "learning_rate": 5.7867530157177695e-06, "loss": 17.4884, "step": 25506 }, { "epoch": 0.46624746376149306, "grad_norm": 6.234282463089636, "learning_rate": 5.786460690275248e-06, "loss": 17.4245, "step": 25507 }, { "epoch": 0.46626574295793954, "grad_norm": 5.2487149707511795, "learning_rate": 5.786168362076253e-06, "loss": 17.1315, "step": 25508 }, { "epoch": 0.4662840221543861, "grad_norm": 6.327196159722178, "learning_rate": 5.785876031121804e-06, "loss": 17.3609, "step": 25509 }, { "epoch": 0.4663023013508326, "grad_norm": 4.98513399112824, "learning_rate": 5.7855836974129275e-06, "loss": 16.8159, "step": 25510 }, { "epoch": 0.46632058054727915, "grad_norm": 7.033274968593185, "learning_rate": 5.78529136095065e-06, "loss": 17.8629, "step": 25511 }, { "epoch": 0.4663388597437257, "grad_norm": 6.4682641751182395, "learning_rate": 5.784999021735994e-06, "loss": 17.729, "step": 25512 }, { "epoch": 0.46635713894017217, "grad_norm": 6.061961259786219, "learning_rate": 5.7847066797699835e-06, "loss": 17.3876, "step": 25513 }, { "epoch": 0.4663754181366187, "grad_norm": 7.0861948553921135, "learning_rate": 5.784414335053645e-06, "loss": 17.6794, "step": 25514 }, { "epoch": 0.46639369733306524, "grad_norm": 5.939268965139865, "learning_rate": 5.7841219875880014e-06, "loss": 17.4165, "step": 25515 }, { "epoch": 0.4664119765295118, "grad_norm": 7.868548777318356, "learning_rate": 5.783829637374079e-06, "loss": 17.8753, "step": 25516 }, { "epoch": 0.4664302557259583, "grad_norm": 5.904137056078681, "learning_rate": 5.783537284412901e-06, "loss": 17.0714, "step": 25517 }, { "epoch": 0.4664485349224048, "grad_norm": 8.465949297581712, "learning_rate": 5.783244928705494e-06, "loss": 18.3392, "step": 25518 }, { "epoch": 0.4664668141188513, "grad_norm": 5.678356745077976, "learning_rate": 5.782952570252881e-06, "loss": 17.1089, "step": 25519 }, { "epoch": 0.46648509331529786, "grad_norm": 6.723494075443567, "learning_rate": 5.782660209056087e-06, "loss": 17.9239, "step": 25520 }, { "epoch": 0.4665033725117444, "grad_norm": 6.949571521758994, "learning_rate": 5.782367845116137e-06, "loss": 17.7583, "step": 25521 }, { "epoch": 0.46652165170819093, "grad_norm": 7.673193173161843, "learning_rate": 5.782075478434056e-06, "loss": 18.2279, "step": 25522 }, { "epoch": 0.4665399309046374, "grad_norm": 7.222699785340751, "learning_rate": 5.7817831090108665e-06, "loss": 17.7451, "step": 25523 }, { "epoch": 0.46655821010108395, "grad_norm": 8.3256639385663, "learning_rate": 5.781490736847597e-06, "loss": 18.2498, "step": 25524 }, { "epoch": 0.4665764892975305, "grad_norm": 6.864595074317109, "learning_rate": 5.78119836194527e-06, "loss": 17.7798, "step": 25525 }, { "epoch": 0.466594768493977, "grad_norm": 7.300946696061787, "learning_rate": 5.780905984304911e-06, "loss": 17.8623, "step": 25526 }, { "epoch": 0.46661304769042355, "grad_norm": 7.140533085292159, "learning_rate": 5.780613603927543e-06, "loss": 17.791, "step": 25527 }, { "epoch": 0.46663132688687003, "grad_norm": 6.553813815456046, "learning_rate": 5.7803212208141925e-06, "loss": 17.4941, "step": 25528 }, { "epoch": 0.46664960608331657, "grad_norm": 6.372771379056917, "learning_rate": 5.780028834965884e-06, "loss": 17.5054, "step": 25529 }, { "epoch": 0.4666678852797631, "grad_norm": 6.650607337775036, "learning_rate": 5.779736446383642e-06, "loss": 17.7421, "step": 25530 }, { "epoch": 0.46668616447620964, "grad_norm": 5.45317082491413, "learning_rate": 5.7794440550684914e-06, "loss": 17.1432, "step": 25531 }, { "epoch": 0.4667044436726562, "grad_norm": 6.692476524637104, "learning_rate": 5.779151661021457e-06, "loss": 17.5515, "step": 25532 }, { "epoch": 0.46672272286910266, "grad_norm": 6.150310827331175, "learning_rate": 5.778859264243564e-06, "loss": 17.3746, "step": 25533 }, { "epoch": 0.4667410020655492, "grad_norm": 5.5649850611619085, "learning_rate": 5.778566864735836e-06, "loss": 17.3589, "step": 25534 }, { "epoch": 0.46675928126199573, "grad_norm": 6.7430881717107, "learning_rate": 5.778274462499301e-06, "loss": 18.0345, "step": 25535 }, { "epoch": 0.46677756045844226, "grad_norm": 6.655482925491027, "learning_rate": 5.777982057534978e-06, "loss": 17.6676, "step": 25536 }, { "epoch": 0.46679583965488874, "grad_norm": 8.495092585597954, "learning_rate": 5.777689649843897e-06, "loss": 18.0385, "step": 25537 }, { "epoch": 0.4668141188513353, "grad_norm": 7.124209805019028, "learning_rate": 5.777397239427081e-06, "loss": 17.7627, "step": 25538 }, { "epoch": 0.4668323980477818, "grad_norm": 6.998044890956389, "learning_rate": 5.7771048262855565e-06, "loss": 17.7135, "step": 25539 }, { "epoch": 0.46685067724422835, "grad_norm": 6.909892355272215, "learning_rate": 5.776812410420347e-06, "loss": 18.0957, "step": 25540 }, { "epoch": 0.4668689564406749, "grad_norm": 6.803884410587914, "learning_rate": 5.7765199918324766e-06, "loss": 17.3433, "step": 25541 }, { "epoch": 0.46688723563712137, "grad_norm": 6.327646757334009, "learning_rate": 5.77622757052297e-06, "loss": 17.2302, "step": 25542 }, { "epoch": 0.4669055148335679, "grad_norm": 6.224291355273158, "learning_rate": 5.775935146492855e-06, "loss": 17.4444, "step": 25543 }, { "epoch": 0.46692379403001444, "grad_norm": 6.495749539530074, "learning_rate": 5.775642719743153e-06, "loss": 17.742, "step": 25544 }, { "epoch": 0.466942073226461, "grad_norm": 5.932850909083503, "learning_rate": 5.7753502902748915e-06, "loss": 17.1484, "step": 25545 }, { "epoch": 0.4669603524229075, "grad_norm": 6.508198917908852, "learning_rate": 5.775057858089094e-06, "loss": 17.3845, "step": 25546 }, { "epoch": 0.466978631619354, "grad_norm": 8.117538089044235, "learning_rate": 5.774765423186786e-06, "loss": 17.6249, "step": 25547 }, { "epoch": 0.4669969108158005, "grad_norm": 6.58849137660269, "learning_rate": 5.774472985568993e-06, "loss": 17.6773, "step": 25548 }, { "epoch": 0.46701519001224706, "grad_norm": 8.029901195463289, "learning_rate": 5.7741805452367395e-06, "loss": 17.898, "step": 25549 }, { "epoch": 0.4670334692086936, "grad_norm": 5.626040265528789, "learning_rate": 5.77388810219105e-06, "loss": 17.1408, "step": 25550 }, { "epoch": 0.46705174840514013, "grad_norm": 7.145792048078933, "learning_rate": 5.773595656432949e-06, "loss": 17.8677, "step": 25551 }, { "epoch": 0.4670700276015866, "grad_norm": 6.079540984990817, "learning_rate": 5.773303207963463e-06, "loss": 17.4458, "step": 25552 }, { "epoch": 0.46708830679803315, "grad_norm": 7.3454322016886815, "learning_rate": 5.773010756783618e-06, "loss": 17.6739, "step": 25553 }, { "epoch": 0.4671065859944797, "grad_norm": 7.135559965269335, "learning_rate": 5.772718302894436e-06, "loss": 17.7609, "step": 25554 }, { "epoch": 0.4671248651909262, "grad_norm": 6.4889447700450615, "learning_rate": 5.772425846296942e-06, "loss": 17.4732, "step": 25555 }, { "epoch": 0.46714314438737276, "grad_norm": 6.333431867166509, "learning_rate": 5.772133386992164e-06, "loss": 17.6238, "step": 25556 }, { "epoch": 0.46716142358381924, "grad_norm": 6.435345424507448, "learning_rate": 5.771840924981126e-06, "loss": 17.1634, "step": 25557 }, { "epoch": 0.46717970278026577, "grad_norm": 7.149523412735398, "learning_rate": 5.771548460264851e-06, "loss": 17.5551, "step": 25558 }, { "epoch": 0.4671979819767123, "grad_norm": 6.234826849885168, "learning_rate": 5.771255992844367e-06, "loss": 17.5132, "step": 25559 }, { "epoch": 0.46721626117315884, "grad_norm": 8.309589306755823, "learning_rate": 5.770963522720696e-06, "loss": 18.2804, "step": 25560 }, { "epoch": 0.4672345403696054, "grad_norm": 6.5399304772071725, "learning_rate": 5.770671049894866e-06, "loss": 17.3866, "step": 25561 }, { "epoch": 0.46725281956605186, "grad_norm": 5.692793393220293, "learning_rate": 5.7703785743679005e-06, "loss": 17.2978, "step": 25562 }, { "epoch": 0.4672710987624984, "grad_norm": 9.360252230875442, "learning_rate": 5.770086096140826e-06, "loss": 18.3529, "step": 25563 }, { "epoch": 0.46728937795894493, "grad_norm": 7.343001489445058, "learning_rate": 5.769793615214665e-06, "loss": 18.1661, "step": 25564 }, { "epoch": 0.46730765715539146, "grad_norm": 5.8445637134500386, "learning_rate": 5.769501131590445e-06, "loss": 17.3307, "step": 25565 }, { "epoch": 0.467325936351838, "grad_norm": 5.3673205644296695, "learning_rate": 5.769208645269191e-06, "loss": 16.9959, "step": 25566 }, { "epoch": 0.4673442155482845, "grad_norm": 5.995327681747548, "learning_rate": 5.768916156251926e-06, "loss": 17.3419, "step": 25567 }, { "epoch": 0.467362494744731, "grad_norm": 5.9037602333119095, "learning_rate": 5.768623664539677e-06, "loss": 17.4071, "step": 25568 }, { "epoch": 0.46738077394117755, "grad_norm": 7.740053557973855, "learning_rate": 5.7683311701334675e-06, "loss": 18.268, "step": 25569 }, { "epoch": 0.4673990531376241, "grad_norm": 5.596008801615207, "learning_rate": 5.7680386730343265e-06, "loss": 17.4299, "step": 25570 }, { "epoch": 0.46741733233407057, "grad_norm": 6.375225596961252, "learning_rate": 5.767746173243276e-06, "loss": 17.421, "step": 25571 }, { "epoch": 0.4674356115305171, "grad_norm": 6.040953522506135, "learning_rate": 5.7674536707613404e-06, "loss": 17.4077, "step": 25572 }, { "epoch": 0.46745389072696364, "grad_norm": 5.330775525693102, "learning_rate": 5.767161165589548e-06, "loss": 17.1028, "step": 25573 }, { "epoch": 0.4674721699234102, "grad_norm": 7.585647974726364, "learning_rate": 5.766868657728921e-06, "loss": 17.7483, "step": 25574 }, { "epoch": 0.4674904491198567, "grad_norm": 6.346581416172484, "learning_rate": 5.766576147180486e-06, "loss": 17.537, "step": 25575 }, { "epoch": 0.4675087283163032, "grad_norm": 6.514677848062042, "learning_rate": 5.766283633945269e-06, "loss": 17.7364, "step": 25576 }, { "epoch": 0.4675270075127497, "grad_norm": 6.8426307403884, "learning_rate": 5.765991118024294e-06, "loss": 17.6425, "step": 25577 }, { "epoch": 0.46754528670919626, "grad_norm": 6.11212857663012, "learning_rate": 5.765698599418585e-06, "loss": 17.4694, "step": 25578 }, { "epoch": 0.4675635659056428, "grad_norm": 6.029309951634189, "learning_rate": 5.76540607812917e-06, "loss": 17.3648, "step": 25579 }, { "epoch": 0.46758184510208933, "grad_norm": 7.3512979418722075, "learning_rate": 5.7651135541570745e-06, "loss": 17.4534, "step": 25580 }, { "epoch": 0.4676001242985358, "grad_norm": 6.478351434879791, "learning_rate": 5.764821027503321e-06, "loss": 17.1912, "step": 25581 }, { "epoch": 0.46761840349498235, "grad_norm": 6.2906457901331825, "learning_rate": 5.764528498168935e-06, "loss": 17.4239, "step": 25582 }, { "epoch": 0.4676366826914289, "grad_norm": 5.0143946326574875, "learning_rate": 5.764235966154945e-06, "loss": 17.0215, "step": 25583 }, { "epoch": 0.4676549618878754, "grad_norm": 6.367645216377919, "learning_rate": 5.763943431462375e-06, "loss": 17.4521, "step": 25584 }, { "epoch": 0.46767324108432196, "grad_norm": 5.883589161899149, "learning_rate": 5.763650894092247e-06, "loss": 17.3561, "step": 25585 }, { "epoch": 0.46769152028076844, "grad_norm": 7.519895788858407, "learning_rate": 5.763358354045591e-06, "loss": 17.4102, "step": 25586 }, { "epoch": 0.46770979947721497, "grad_norm": 7.090251417467326, "learning_rate": 5.763065811323429e-06, "loss": 17.4349, "step": 25587 }, { "epoch": 0.4677280786736615, "grad_norm": 6.605999878783802, "learning_rate": 5.762773265926788e-06, "loss": 17.434, "step": 25588 }, { "epoch": 0.46774635787010804, "grad_norm": 5.274989284856472, "learning_rate": 5.762480717856692e-06, "loss": 16.9382, "step": 25589 }, { "epoch": 0.4677646370665546, "grad_norm": 6.385074581300302, "learning_rate": 5.762188167114168e-06, "loss": 17.5914, "step": 25590 }, { "epoch": 0.46778291626300106, "grad_norm": 5.097482259909812, "learning_rate": 5.76189561370024e-06, "loss": 17.1079, "step": 25591 }, { "epoch": 0.4678011954594476, "grad_norm": 6.112364499099624, "learning_rate": 5.761603057615936e-06, "loss": 17.45, "step": 25592 }, { "epoch": 0.46781947465589413, "grad_norm": 6.6746320042266545, "learning_rate": 5.761310498862277e-06, "loss": 17.5868, "step": 25593 }, { "epoch": 0.46783775385234067, "grad_norm": 6.196454607512979, "learning_rate": 5.761017937440293e-06, "loss": 17.5181, "step": 25594 }, { "epoch": 0.4678560330487872, "grad_norm": 7.1611120361721925, "learning_rate": 5.760725373351006e-06, "loss": 17.6064, "step": 25595 }, { "epoch": 0.4678743122452337, "grad_norm": 6.581741303080483, "learning_rate": 5.760432806595441e-06, "loss": 17.7773, "step": 25596 }, { "epoch": 0.4678925914416802, "grad_norm": 6.500205512880502, "learning_rate": 5.760140237174627e-06, "loss": 17.6136, "step": 25597 }, { "epoch": 0.46791087063812675, "grad_norm": 6.2234080923482935, "learning_rate": 5.759847665089587e-06, "loss": 17.37, "step": 25598 }, { "epoch": 0.4679291498345733, "grad_norm": 5.453934784457371, "learning_rate": 5.759555090341347e-06, "loss": 17.2709, "step": 25599 }, { "epoch": 0.4679474290310198, "grad_norm": 5.221289181546808, "learning_rate": 5.759262512930932e-06, "loss": 17.1167, "step": 25600 }, { "epoch": 0.4679657082274663, "grad_norm": 7.9108630108612665, "learning_rate": 5.7589699328593675e-06, "loss": 18.2024, "step": 25601 }, { "epoch": 0.46798398742391284, "grad_norm": 7.952453852758127, "learning_rate": 5.758677350127679e-06, "loss": 17.6896, "step": 25602 }, { "epoch": 0.4680022666203594, "grad_norm": 4.938859191373233, "learning_rate": 5.758384764736893e-06, "loss": 17.1023, "step": 25603 }, { "epoch": 0.4680205458168059, "grad_norm": 7.937993365598106, "learning_rate": 5.758092176688035e-06, "loss": 18.5179, "step": 25604 }, { "epoch": 0.4680388250132524, "grad_norm": 6.450681511813594, "learning_rate": 5.757799585982128e-06, "loss": 17.8099, "step": 25605 }, { "epoch": 0.4680571042096989, "grad_norm": 6.683748311075554, "learning_rate": 5.7575069926201995e-06, "loss": 17.5805, "step": 25606 }, { "epoch": 0.46807538340614546, "grad_norm": 6.964070926733987, "learning_rate": 5.757214396603275e-06, "loss": 18.0081, "step": 25607 }, { "epoch": 0.468093662602592, "grad_norm": 6.770428775611666, "learning_rate": 5.756921797932381e-06, "loss": 17.1792, "step": 25608 }, { "epoch": 0.46811194179903853, "grad_norm": 5.657804233910794, "learning_rate": 5.756629196608541e-06, "loss": 17.0043, "step": 25609 }, { "epoch": 0.468130220995485, "grad_norm": 5.359336554783504, "learning_rate": 5.756336592632779e-06, "loss": 16.9921, "step": 25610 }, { "epoch": 0.46814850019193155, "grad_norm": 6.278354616573513, "learning_rate": 5.756043986006127e-06, "loss": 17.5775, "step": 25611 }, { "epoch": 0.4681667793883781, "grad_norm": 8.526799560984587, "learning_rate": 5.7557513767296035e-06, "loss": 17.9176, "step": 25612 }, { "epoch": 0.4681850585848246, "grad_norm": 5.399386204186027, "learning_rate": 5.7554587648042384e-06, "loss": 17.0852, "step": 25613 }, { "epoch": 0.46820333778127116, "grad_norm": 8.148357792618592, "learning_rate": 5.755166150231055e-06, "loss": 17.9017, "step": 25614 }, { "epoch": 0.46822161697771764, "grad_norm": 5.188454402782272, "learning_rate": 5.754873533011079e-06, "loss": 17.1485, "step": 25615 }, { "epoch": 0.46823989617416417, "grad_norm": 6.797866173821247, "learning_rate": 5.754580913145339e-06, "loss": 17.8525, "step": 25616 }, { "epoch": 0.4682581753706107, "grad_norm": 6.606843059171352, "learning_rate": 5.754288290634856e-06, "loss": 17.5798, "step": 25617 }, { "epoch": 0.46827645456705724, "grad_norm": 6.558975467364231, "learning_rate": 5.753995665480659e-06, "loss": 17.4101, "step": 25618 }, { "epoch": 0.4682947337635038, "grad_norm": 6.4949767267599166, "learning_rate": 5.753703037683774e-06, "loss": 17.6557, "step": 25619 }, { "epoch": 0.46831301295995026, "grad_norm": 6.863121463205504, "learning_rate": 5.753410407245224e-06, "loss": 17.7778, "step": 25620 }, { "epoch": 0.4683312921563968, "grad_norm": 6.479891724331871, "learning_rate": 5.753117774166036e-06, "loss": 17.263, "step": 25621 }, { "epoch": 0.46834957135284333, "grad_norm": 5.386820293998476, "learning_rate": 5.7528251384472365e-06, "loss": 16.9541, "step": 25622 }, { "epoch": 0.46836785054928987, "grad_norm": 6.605155191840947, "learning_rate": 5.752532500089848e-06, "loss": 17.2899, "step": 25623 }, { "epoch": 0.4683861297457364, "grad_norm": 5.857745284694996, "learning_rate": 5.7522398590948995e-06, "loss": 17.3025, "step": 25624 }, { "epoch": 0.4684044089421829, "grad_norm": 5.650735443579671, "learning_rate": 5.7519472154634174e-06, "loss": 17.2117, "step": 25625 }, { "epoch": 0.4684226881386294, "grad_norm": 7.130100135051834, "learning_rate": 5.751654569196423e-06, "loss": 17.9528, "step": 25626 }, { "epoch": 0.46844096733507595, "grad_norm": 8.278536021762902, "learning_rate": 5.751361920294946e-06, "loss": 18.1778, "step": 25627 }, { "epoch": 0.4684592465315225, "grad_norm": 5.957601076068427, "learning_rate": 5.751069268760008e-06, "loss": 17.4728, "step": 25628 }, { "epoch": 0.468477525727969, "grad_norm": 6.7556101549333585, "learning_rate": 5.750776614592641e-06, "loss": 17.7987, "step": 25629 }, { "epoch": 0.4684958049244155, "grad_norm": 6.83338256167171, "learning_rate": 5.750483957793865e-06, "loss": 17.4064, "step": 25630 }, { "epoch": 0.46851408412086204, "grad_norm": 7.054059252306268, "learning_rate": 5.750191298364709e-06, "loss": 17.7911, "step": 25631 }, { "epoch": 0.4685323633173086, "grad_norm": 7.719577776442061, "learning_rate": 5.749898636306197e-06, "loss": 17.7469, "step": 25632 }, { "epoch": 0.4685506425137551, "grad_norm": 5.074989754595142, "learning_rate": 5.749605971619355e-06, "loss": 16.9432, "step": 25633 }, { "epoch": 0.46856892171020165, "grad_norm": 5.9451183656073985, "learning_rate": 5.74931330430521e-06, "loss": 17.2048, "step": 25634 }, { "epoch": 0.4685872009066481, "grad_norm": 6.742594603455647, "learning_rate": 5.749020634364787e-06, "loss": 17.5849, "step": 25635 }, { "epoch": 0.46860548010309466, "grad_norm": 6.11718331539814, "learning_rate": 5.748727961799111e-06, "loss": 17.7196, "step": 25636 }, { "epoch": 0.4686237592995412, "grad_norm": 6.610508876323689, "learning_rate": 5.7484352866092075e-06, "loss": 17.4668, "step": 25637 }, { "epoch": 0.46864203849598773, "grad_norm": 7.691559971277722, "learning_rate": 5.748142608796105e-06, "loss": 17.7219, "step": 25638 }, { "epoch": 0.4686603176924342, "grad_norm": 6.308453846473192, "learning_rate": 5.747849928360827e-06, "loss": 17.3177, "step": 25639 }, { "epoch": 0.46867859688888075, "grad_norm": 5.393722225054013, "learning_rate": 5.7475572453044e-06, "loss": 17.0605, "step": 25640 }, { "epoch": 0.4686968760853273, "grad_norm": 9.16376237447455, "learning_rate": 5.7472645596278495e-06, "loss": 17.6939, "step": 25641 }, { "epoch": 0.4687151552817738, "grad_norm": 7.240996906399217, "learning_rate": 5.7469718713322024e-06, "loss": 18.1665, "step": 25642 }, { "epoch": 0.46873343447822036, "grad_norm": 7.507610472592406, "learning_rate": 5.7466791804184815e-06, "loss": 18.2205, "step": 25643 }, { "epoch": 0.46875171367466684, "grad_norm": 5.822709102917425, "learning_rate": 5.746386486887718e-06, "loss": 17.2523, "step": 25644 }, { "epoch": 0.4687699928711134, "grad_norm": 6.237261321188969, "learning_rate": 5.746093790740932e-06, "loss": 17.6036, "step": 25645 }, { "epoch": 0.4687882720675599, "grad_norm": 6.15218480557225, "learning_rate": 5.745801091979153e-06, "loss": 17.4224, "step": 25646 }, { "epoch": 0.46880655126400644, "grad_norm": 6.24742142239045, "learning_rate": 5.745508390603405e-06, "loss": 17.301, "step": 25647 }, { "epoch": 0.468824830460453, "grad_norm": 6.127466908515292, "learning_rate": 5.7452156866147155e-06, "loss": 17.361, "step": 25648 }, { "epoch": 0.46884310965689946, "grad_norm": 6.378781447889848, "learning_rate": 5.744922980014111e-06, "loss": 17.5905, "step": 25649 }, { "epoch": 0.468861388853346, "grad_norm": 6.266970334193923, "learning_rate": 5.744630270802614e-06, "loss": 17.6054, "step": 25650 }, { "epoch": 0.46887966804979253, "grad_norm": 5.714962090082019, "learning_rate": 5.744337558981253e-06, "loss": 17.4403, "step": 25651 }, { "epoch": 0.46889794724623907, "grad_norm": 6.36557250001768, "learning_rate": 5.744044844551053e-06, "loss": 17.1341, "step": 25652 }, { "epoch": 0.4689162264426856, "grad_norm": 7.188647325522799, "learning_rate": 5.743752127513042e-06, "loss": 17.8325, "step": 25653 }, { "epoch": 0.4689345056391321, "grad_norm": 7.547680872870857, "learning_rate": 5.743459407868245e-06, "loss": 17.8632, "step": 25654 }, { "epoch": 0.4689527848355786, "grad_norm": 6.92933596374259, "learning_rate": 5.743166685617685e-06, "loss": 17.7693, "step": 25655 }, { "epoch": 0.46897106403202515, "grad_norm": 7.349112223008469, "learning_rate": 5.742873960762392e-06, "loss": 18.1488, "step": 25656 }, { "epoch": 0.4689893432284717, "grad_norm": 6.8542570673454755, "learning_rate": 5.7425812333033884e-06, "loss": 17.6014, "step": 25657 }, { "epoch": 0.4690076224249182, "grad_norm": 6.339414208739852, "learning_rate": 5.742288503241703e-06, "loss": 17.6208, "step": 25658 }, { "epoch": 0.4690259016213647, "grad_norm": 6.584263814777369, "learning_rate": 5.741995770578362e-06, "loss": 17.9069, "step": 25659 }, { "epoch": 0.46904418081781124, "grad_norm": 5.819636579535961, "learning_rate": 5.741703035314388e-06, "loss": 17.5112, "step": 25660 }, { "epoch": 0.4690624600142578, "grad_norm": 6.241513009919817, "learning_rate": 5.74141029745081e-06, "loss": 17.5247, "step": 25661 }, { "epoch": 0.4690807392107043, "grad_norm": 6.614371455467719, "learning_rate": 5.741117556988653e-06, "loss": 17.8962, "step": 25662 }, { "epoch": 0.46909901840715085, "grad_norm": 5.626675146884744, "learning_rate": 5.740824813928945e-06, "loss": 17.2283, "step": 25663 }, { "epoch": 0.4691172976035973, "grad_norm": 6.17983942813688, "learning_rate": 5.740532068272708e-06, "loss": 17.5033, "step": 25664 }, { "epoch": 0.46913557680004386, "grad_norm": 6.929165717542995, "learning_rate": 5.740239320020972e-06, "loss": 17.4919, "step": 25665 }, { "epoch": 0.4691538559964904, "grad_norm": 7.643820412534104, "learning_rate": 5.739946569174761e-06, "loss": 18.0734, "step": 25666 }, { "epoch": 0.46917213519293693, "grad_norm": 6.86572819422133, "learning_rate": 5.739653815735101e-06, "loss": 17.5621, "step": 25667 }, { "epoch": 0.46919041438938347, "grad_norm": 6.634938317137737, "learning_rate": 5.7393610597030195e-06, "loss": 17.7162, "step": 25668 }, { "epoch": 0.46920869358582995, "grad_norm": 5.9285304036871045, "learning_rate": 5.739068301079539e-06, "loss": 17.4825, "step": 25669 }, { "epoch": 0.4692269727822765, "grad_norm": 6.362775910391573, "learning_rate": 5.738775539865692e-06, "loss": 17.9573, "step": 25670 }, { "epoch": 0.469245251978723, "grad_norm": 5.632427385560699, "learning_rate": 5.738482776062499e-06, "loss": 17.141, "step": 25671 }, { "epoch": 0.46926353117516956, "grad_norm": 5.64359341938006, "learning_rate": 5.738190009670987e-06, "loss": 17.3118, "step": 25672 }, { "epoch": 0.46928181037161604, "grad_norm": 7.628796705740088, "learning_rate": 5.737897240692185e-06, "loss": 18.1154, "step": 25673 }, { "epoch": 0.4693000895680626, "grad_norm": 6.6720638000211006, "learning_rate": 5.737604469127116e-06, "loss": 17.6352, "step": 25674 }, { "epoch": 0.4693183687645091, "grad_norm": 7.118285106718248, "learning_rate": 5.737311694976807e-06, "loss": 17.8439, "step": 25675 }, { "epoch": 0.46933664796095564, "grad_norm": 6.679490191867098, "learning_rate": 5.737018918242285e-06, "loss": 17.4833, "step": 25676 }, { "epoch": 0.4693549271574022, "grad_norm": 6.56219098604823, "learning_rate": 5.7367261389245765e-06, "loss": 17.4497, "step": 25677 }, { "epoch": 0.46937320635384866, "grad_norm": 5.93748186058725, "learning_rate": 5.736433357024706e-06, "loss": 17.2334, "step": 25678 }, { "epoch": 0.4693914855502952, "grad_norm": 5.548583926147259, "learning_rate": 5.7361405725437005e-06, "loss": 17.118, "step": 25679 }, { "epoch": 0.46940976474674173, "grad_norm": 7.320166665768787, "learning_rate": 5.735847785482587e-06, "loss": 17.8548, "step": 25680 }, { "epoch": 0.46942804394318827, "grad_norm": 6.010595277612539, "learning_rate": 5.73555499584239e-06, "loss": 16.8336, "step": 25681 }, { "epoch": 0.4694463231396348, "grad_norm": 5.908111636604885, "learning_rate": 5.7352622036241356e-06, "loss": 17.4006, "step": 25682 }, { "epoch": 0.4694646023360813, "grad_norm": 8.56743637344712, "learning_rate": 5.734969408828852e-06, "loss": 18.1486, "step": 25683 }, { "epoch": 0.4694828815325278, "grad_norm": 8.068802025596723, "learning_rate": 5.734676611457566e-06, "loss": 18.1027, "step": 25684 }, { "epoch": 0.46950116072897435, "grad_norm": 6.568334598477962, "learning_rate": 5.734383811511301e-06, "loss": 17.4182, "step": 25685 }, { "epoch": 0.4695194399254209, "grad_norm": 6.602268560068675, "learning_rate": 5.734091008991083e-06, "loss": 17.3948, "step": 25686 }, { "epoch": 0.4695377191218674, "grad_norm": 8.575644626828332, "learning_rate": 5.733798203897941e-06, "loss": 18.5066, "step": 25687 }, { "epoch": 0.4695559983183139, "grad_norm": 5.9666038254173435, "learning_rate": 5.7335053962329e-06, "loss": 17.3666, "step": 25688 }, { "epoch": 0.46957427751476044, "grad_norm": 7.61695503473196, "learning_rate": 5.7332125859969856e-06, "loss": 17.5837, "step": 25689 }, { "epoch": 0.469592556711207, "grad_norm": 6.1250613603963835, "learning_rate": 5.7329197731912256e-06, "loss": 17.5394, "step": 25690 }, { "epoch": 0.4696108359076535, "grad_norm": 6.446822408970243, "learning_rate": 5.732626957816645e-06, "loss": 17.4589, "step": 25691 }, { "epoch": 0.46962911510410005, "grad_norm": 5.94007934733352, "learning_rate": 5.7323341398742706e-06, "loss": 17.2912, "step": 25692 }, { "epoch": 0.46964739430054653, "grad_norm": 6.384094508784263, "learning_rate": 5.732041319365128e-06, "loss": 17.5478, "step": 25693 }, { "epoch": 0.46966567349699306, "grad_norm": 7.089703335800537, "learning_rate": 5.731748496290246e-06, "loss": 17.5673, "step": 25694 }, { "epoch": 0.4696839526934396, "grad_norm": 4.857902234470191, "learning_rate": 5.731455670650647e-06, "loss": 16.9854, "step": 25695 }, { "epoch": 0.46970223188988613, "grad_norm": 6.634609408981984, "learning_rate": 5.731162842447359e-06, "loss": 17.2928, "step": 25696 }, { "epoch": 0.46972051108633267, "grad_norm": 6.032974300249462, "learning_rate": 5.730870011681411e-06, "loss": 17.3983, "step": 25697 }, { "epoch": 0.46973879028277915, "grad_norm": 5.073998822564762, "learning_rate": 5.730577178353824e-06, "loss": 16.9713, "step": 25698 }, { "epoch": 0.4697570694792257, "grad_norm": 6.252144364596876, "learning_rate": 5.7302843424656294e-06, "loss": 17.2702, "step": 25699 }, { "epoch": 0.4697753486756722, "grad_norm": 6.534455522100211, "learning_rate": 5.729991504017851e-06, "loss": 17.7289, "step": 25700 }, { "epoch": 0.46979362787211876, "grad_norm": 7.048492106480271, "learning_rate": 5.729698663011516e-06, "loss": 17.7036, "step": 25701 }, { "epoch": 0.4698119070685653, "grad_norm": 7.787887934353227, "learning_rate": 5.729405819447651e-06, "loss": 17.8927, "step": 25702 }, { "epoch": 0.4698301862650118, "grad_norm": 6.843305471009764, "learning_rate": 5.729112973327281e-06, "loss": 17.6049, "step": 25703 }, { "epoch": 0.4698484654614583, "grad_norm": 6.609894127921328, "learning_rate": 5.728820124651435e-06, "loss": 17.4264, "step": 25704 }, { "epoch": 0.46986674465790484, "grad_norm": 5.988494520884227, "learning_rate": 5.728527273421135e-06, "loss": 17.3569, "step": 25705 }, { "epoch": 0.4698850238543514, "grad_norm": 6.828862597846488, "learning_rate": 5.728234419637411e-06, "loss": 17.5556, "step": 25706 }, { "epoch": 0.46990330305079786, "grad_norm": 7.474874776494301, "learning_rate": 5.7279415633012895e-06, "loss": 18.2851, "step": 25707 }, { "epoch": 0.4699215822472444, "grad_norm": 7.734436070776567, "learning_rate": 5.7276487044137964e-06, "loss": 18.4307, "step": 25708 }, { "epoch": 0.46993986144369093, "grad_norm": 7.963822028095857, "learning_rate": 5.727355842975956e-06, "loss": 18.0831, "step": 25709 }, { "epoch": 0.46995814064013747, "grad_norm": 7.74876160533145, "learning_rate": 5.7270629789887966e-06, "loss": 17.859, "step": 25710 }, { "epoch": 0.469976419836584, "grad_norm": 5.603722211222619, "learning_rate": 5.7267701124533455e-06, "loss": 17.1839, "step": 25711 }, { "epoch": 0.4699946990330305, "grad_norm": 7.350472567641458, "learning_rate": 5.7264772433706284e-06, "loss": 17.7715, "step": 25712 }, { "epoch": 0.470012978229477, "grad_norm": 8.48293326398749, "learning_rate": 5.726184371741671e-06, "loss": 17.9233, "step": 25713 }, { "epoch": 0.47003125742592355, "grad_norm": 7.3769601066609365, "learning_rate": 5.7258914975675e-06, "loss": 18.0175, "step": 25714 }, { "epoch": 0.4700495366223701, "grad_norm": 5.47695415709752, "learning_rate": 5.725598620849144e-06, "loss": 16.9339, "step": 25715 }, { "epoch": 0.4700678158188166, "grad_norm": 6.340040965722052, "learning_rate": 5.725305741587627e-06, "loss": 17.6432, "step": 25716 }, { "epoch": 0.4700860950152631, "grad_norm": 6.050953636640179, "learning_rate": 5.725012859783975e-06, "loss": 17.3825, "step": 25717 }, { "epoch": 0.47010437421170964, "grad_norm": 6.8669465037179975, "learning_rate": 5.724719975439217e-06, "loss": 17.7668, "step": 25718 }, { "epoch": 0.4701226534081562, "grad_norm": 5.122800605776736, "learning_rate": 5.724427088554379e-06, "loss": 17.0787, "step": 25719 }, { "epoch": 0.4701409326046027, "grad_norm": 6.818098158107216, "learning_rate": 5.7241341991304855e-06, "loss": 17.5973, "step": 25720 }, { "epoch": 0.47015921180104925, "grad_norm": 7.719653622645311, "learning_rate": 5.723841307168565e-06, "loss": 17.8661, "step": 25721 }, { "epoch": 0.47017749099749573, "grad_norm": 7.152130450960114, "learning_rate": 5.723548412669644e-06, "loss": 17.8253, "step": 25722 }, { "epoch": 0.47019577019394226, "grad_norm": 8.988701786963508, "learning_rate": 5.723255515634747e-06, "loss": 17.6978, "step": 25723 }, { "epoch": 0.4702140493903888, "grad_norm": 5.22453220394301, "learning_rate": 5.722962616064903e-06, "loss": 17.0157, "step": 25724 }, { "epoch": 0.47023232858683534, "grad_norm": 5.863474360587279, "learning_rate": 5.72266971396114e-06, "loss": 17.3891, "step": 25725 }, { "epoch": 0.47025060778328187, "grad_norm": 8.334234473614273, "learning_rate": 5.72237680932448e-06, "loss": 18.2978, "step": 25726 }, { "epoch": 0.47026888697972835, "grad_norm": 7.895708930511546, "learning_rate": 5.722083902155952e-06, "loss": 18.1887, "step": 25727 }, { "epoch": 0.4702871661761749, "grad_norm": 5.5779541338091025, "learning_rate": 5.7217909924565825e-06, "loss": 17.3675, "step": 25728 }, { "epoch": 0.4703054453726214, "grad_norm": 7.120368008290413, "learning_rate": 5.7214980802274e-06, "loss": 17.6981, "step": 25729 }, { "epoch": 0.47032372456906796, "grad_norm": 6.318338417013921, "learning_rate": 5.721205165469428e-06, "loss": 17.3642, "step": 25730 }, { "epoch": 0.4703420037655145, "grad_norm": 6.455983042945042, "learning_rate": 5.7209122481836944e-06, "loss": 17.5759, "step": 25731 }, { "epoch": 0.470360282961961, "grad_norm": 7.351994669905593, "learning_rate": 5.720619328371226e-06, "loss": 18.0706, "step": 25732 }, { "epoch": 0.4703785621584075, "grad_norm": 6.5837840471987, "learning_rate": 5.72032640603305e-06, "loss": 17.7664, "step": 25733 }, { "epoch": 0.47039684135485405, "grad_norm": 8.831748245630628, "learning_rate": 5.720033481170192e-06, "loss": 18.2569, "step": 25734 }, { "epoch": 0.4704151205513006, "grad_norm": 6.8952771330461395, "learning_rate": 5.719740553783679e-06, "loss": 17.6438, "step": 25735 }, { "epoch": 0.4704333997477471, "grad_norm": 5.684297049286084, "learning_rate": 5.719447623874539e-06, "loss": 17.1094, "step": 25736 }, { "epoch": 0.4704516789441936, "grad_norm": 6.4742925462084004, "learning_rate": 5.719154691443795e-06, "loss": 17.4488, "step": 25737 }, { "epoch": 0.47046995814064013, "grad_norm": 4.955174616071838, "learning_rate": 5.718861756492478e-06, "loss": 17.0022, "step": 25738 }, { "epoch": 0.47048823733708667, "grad_norm": 6.669365353904177, "learning_rate": 5.718568819021612e-06, "loss": 17.7681, "step": 25739 }, { "epoch": 0.4705065165335332, "grad_norm": 6.576506940733695, "learning_rate": 5.718275879032226e-06, "loss": 17.3518, "step": 25740 }, { "epoch": 0.4705247957299797, "grad_norm": 6.843285743590159, "learning_rate": 5.717982936525345e-06, "loss": 17.7304, "step": 25741 }, { "epoch": 0.4705430749264262, "grad_norm": 6.1425211194349005, "learning_rate": 5.717689991501996e-06, "loss": 17.572, "step": 25742 }, { "epoch": 0.47056135412287275, "grad_norm": 6.259091147614705, "learning_rate": 5.7173970439632044e-06, "loss": 17.2392, "step": 25743 }, { "epoch": 0.4705796333193193, "grad_norm": 6.56324687763438, "learning_rate": 5.71710409391e-06, "loss": 17.699, "step": 25744 }, { "epoch": 0.4705979125157658, "grad_norm": 6.4870259587971075, "learning_rate": 5.716811141343407e-06, "loss": 17.575, "step": 25745 }, { "epoch": 0.4706161917122123, "grad_norm": 7.0280958866553185, "learning_rate": 5.716518186264454e-06, "loss": 17.7948, "step": 25746 }, { "epoch": 0.47063447090865884, "grad_norm": 7.43835303131793, "learning_rate": 5.716225228674166e-06, "loss": 17.4661, "step": 25747 }, { "epoch": 0.4706527501051054, "grad_norm": 5.680874544795799, "learning_rate": 5.715932268573572e-06, "loss": 17.3705, "step": 25748 }, { "epoch": 0.4706710293015519, "grad_norm": 11.223624627611429, "learning_rate": 5.715639305963697e-06, "loss": 18.3935, "step": 25749 }, { "epoch": 0.47068930849799845, "grad_norm": 7.106054349598446, "learning_rate": 5.715346340845568e-06, "loss": 17.6383, "step": 25750 }, { "epoch": 0.47070758769444493, "grad_norm": 5.927422855884639, "learning_rate": 5.7150533732202115e-06, "loss": 17.346, "step": 25751 }, { "epoch": 0.47072586689089146, "grad_norm": 6.295244208832737, "learning_rate": 5.714760403088656e-06, "loss": 17.5507, "step": 25752 }, { "epoch": 0.470744146087338, "grad_norm": 9.004936621510755, "learning_rate": 5.7144674304519275e-06, "loss": 17.6309, "step": 25753 }, { "epoch": 0.47076242528378454, "grad_norm": 8.922715486165398, "learning_rate": 5.714174455311052e-06, "loss": 17.9797, "step": 25754 }, { "epoch": 0.47078070448023107, "grad_norm": 7.6902900504639415, "learning_rate": 5.713881477667057e-06, "loss": 18.4467, "step": 25755 }, { "epoch": 0.47079898367667755, "grad_norm": 10.561340904277566, "learning_rate": 5.713588497520971e-06, "loss": 18.5204, "step": 25756 }, { "epoch": 0.4708172628731241, "grad_norm": 6.6134355482343175, "learning_rate": 5.7132955148738174e-06, "loss": 17.7061, "step": 25757 }, { "epoch": 0.4708355420695706, "grad_norm": 6.8893242974843885, "learning_rate": 5.713002529726626e-06, "loss": 17.7588, "step": 25758 }, { "epoch": 0.47085382126601716, "grad_norm": 7.474456619203395, "learning_rate": 5.712709542080421e-06, "loss": 18.0075, "step": 25759 }, { "epoch": 0.4708721004624637, "grad_norm": 6.948855013586783, "learning_rate": 5.712416551936232e-06, "loss": 17.8254, "step": 25760 }, { "epoch": 0.4708903796589102, "grad_norm": 6.50457562766823, "learning_rate": 5.7121235592950855e-06, "loss": 17.751, "step": 25761 }, { "epoch": 0.4709086588553567, "grad_norm": 5.83894267974012, "learning_rate": 5.711830564158006e-06, "loss": 17.2684, "step": 25762 }, { "epoch": 0.47092693805180325, "grad_norm": 7.653457693003221, "learning_rate": 5.711537566526024e-06, "loss": 17.9138, "step": 25763 }, { "epoch": 0.4709452172482498, "grad_norm": 5.1323006430626785, "learning_rate": 5.711244566400163e-06, "loss": 16.9562, "step": 25764 }, { "epoch": 0.4709634964446963, "grad_norm": 8.25864421776104, "learning_rate": 5.710951563781452e-06, "loss": 17.7398, "step": 25765 }, { "epoch": 0.4709817756411428, "grad_norm": 7.69456264534389, "learning_rate": 5.710658558670919e-06, "loss": 18.1504, "step": 25766 }, { "epoch": 0.47100005483758933, "grad_norm": 6.088654055819759, "learning_rate": 5.710365551069588e-06, "loss": 17.5746, "step": 25767 }, { "epoch": 0.47101833403403587, "grad_norm": 5.194843577707228, "learning_rate": 5.710072540978487e-06, "loss": 17.1521, "step": 25768 }, { "epoch": 0.4710366132304824, "grad_norm": 6.697547190304205, "learning_rate": 5.709779528398643e-06, "loss": 17.759, "step": 25769 }, { "epoch": 0.47105489242692894, "grad_norm": 6.429279863825913, "learning_rate": 5.709486513331085e-06, "loss": 17.1669, "step": 25770 }, { "epoch": 0.4710731716233754, "grad_norm": 6.441112590700953, "learning_rate": 5.709193495776837e-06, "loss": 17.7022, "step": 25771 }, { "epoch": 0.47109145081982196, "grad_norm": 6.625448028730175, "learning_rate": 5.708900475736928e-06, "loss": 17.4705, "step": 25772 }, { "epoch": 0.4711097300162685, "grad_norm": 6.305411184269652, "learning_rate": 5.708607453212385e-06, "loss": 17.5364, "step": 25773 }, { "epoch": 0.471128009212715, "grad_norm": 6.290464556869714, "learning_rate": 5.708314428204233e-06, "loss": 17.5009, "step": 25774 }, { "epoch": 0.4711462884091615, "grad_norm": 7.906891643381622, "learning_rate": 5.708021400713501e-06, "loss": 17.8957, "step": 25775 }, { "epoch": 0.47116456760560804, "grad_norm": 6.183705673203319, "learning_rate": 5.707728370741215e-06, "loss": 17.5608, "step": 25776 }, { "epoch": 0.4711828468020546, "grad_norm": 7.540136204940559, "learning_rate": 5.707435338288403e-06, "loss": 18.1934, "step": 25777 }, { "epoch": 0.4712011259985011, "grad_norm": 6.791809613177821, "learning_rate": 5.707142303356092e-06, "loss": 17.4112, "step": 25778 }, { "epoch": 0.47121940519494765, "grad_norm": 5.89670348103306, "learning_rate": 5.706849265945307e-06, "loss": 17.3449, "step": 25779 }, { "epoch": 0.47123768439139413, "grad_norm": 7.1241957625525085, "learning_rate": 5.706556226057078e-06, "loss": 17.7355, "step": 25780 }, { "epoch": 0.47125596358784066, "grad_norm": 7.153960642312598, "learning_rate": 5.706263183692431e-06, "loss": 17.6717, "step": 25781 }, { "epoch": 0.4712742427842872, "grad_norm": 6.438701194873751, "learning_rate": 5.70597013885239e-06, "loss": 17.5661, "step": 25782 }, { "epoch": 0.47129252198073374, "grad_norm": 7.390245278411702, "learning_rate": 5.705677091537988e-06, "loss": 18.0024, "step": 25783 }, { "epoch": 0.47131080117718027, "grad_norm": 5.88308197225255, "learning_rate": 5.705384041750249e-06, "loss": 17.3385, "step": 25784 }, { "epoch": 0.47132908037362675, "grad_norm": 5.560797798888823, "learning_rate": 5.705090989490199e-06, "loss": 17.2345, "step": 25785 }, { "epoch": 0.4713473595700733, "grad_norm": 6.386049160659695, "learning_rate": 5.7047979347588665e-06, "loss": 17.8416, "step": 25786 }, { "epoch": 0.4713656387665198, "grad_norm": 6.9627073599178715, "learning_rate": 5.704504877557279e-06, "loss": 18.0885, "step": 25787 }, { "epoch": 0.47138391796296636, "grad_norm": 6.204843270334688, "learning_rate": 5.704211817886463e-06, "loss": 17.7679, "step": 25788 }, { "epoch": 0.4714021971594129, "grad_norm": 6.480918566703043, "learning_rate": 5.703918755747444e-06, "loss": 17.6984, "step": 25789 }, { "epoch": 0.4714204763558594, "grad_norm": 7.127484284389984, "learning_rate": 5.703625691141252e-06, "loss": 17.9324, "step": 25790 }, { "epoch": 0.4714387555523059, "grad_norm": 6.2940842817535, "learning_rate": 5.703332624068914e-06, "loss": 17.2779, "step": 25791 }, { "epoch": 0.47145703474875245, "grad_norm": 6.574987717773518, "learning_rate": 5.703039554531454e-06, "loss": 17.7116, "step": 25792 }, { "epoch": 0.471475313945199, "grad_norm": 7.253700012881749, "learning_rate": 5.7027464825299024e-06, "loss": 17.8144, "step": 25793 }, { "epoch": 0.4714935931416455, "grad_norm": 6.939616955610362, "learning_rate": 5.702453408065287e-06, "loss": 17.7171, "step": 25794 }, { "epoch": 0.471511872338092, "grad_norm": 7.077047832098274, "learning_rate": 5.702160331138632e-06, "loss": 17.4779, "step": 25795 }, { "epoch": 0.47153015153453853, "grad_norm": 8.067816705056266, "learning_rate": 5.7018672517509645e-06, "loss": 18.0141, "step": 25796 }, { "epoch": 0.47154843073098507, "grad_norm": 4.89697354386606, "learning_rate": 5.7015741699033144e-06, "loss": 16.8199, "step": 25797 }, { "epoch": 0.4715667099274316, "grad_norm": 6.599982962770622, "learning_rate": 5.701281085596709e-06, "loss": 17.2145, "step": 25798 }, { "epoch": 0.47158498912387814, "grad_norm": 6.756677056868582, "learning_rate": 5.700987998832173e-06, "loss": 17.5423, "step": 25799 }, { "epoch": 0.4716032683203246, "grad_norm": 6.4648199917368325, "learning_rate": 5.700694909610736e-06, "loss": 17.7403, "step": 25800 }, { "epoch": 0.47162154751677116, "grad_norm": 6.397784271903643, "learning_rate": 5.700401817933422e-06, "loss": 17.6628, "step": 25801 }, { "epoch": 0.4716398267132177, "grad_norm": 5.149442698610189, "learning_rate": 5.700108723801262e-06, "loss": 17.0186, "step": 25802 }, { "epoch": 0.4716581059096642, "grad_norm": 7.4748213213920796, "learning_rate": 5.69981562721528e-06, "loss": 17.7124, "step": 25803 }, { "epoch": 0.47167638510611076, "grad_norm": 7.234924381618311, "learning_rate": 5.699522528176506e-06, "loss": 18.0197, "step": 25804 }, { "epoch": 0.47169466430255724, "grad_norm": 6.348990477810652, "learning_rate": 5.699229426685967e-06, "loss": 17.305, "step": 25805 }, { "epoch": 0.4717129434990038, "grad_norm": 5.804665038058139, "learning_rate": 5.698936322744689e-06, "loss": 17.1312, "step": 25806 }, { "epoch": 0.4717312226954503, "grad_norm": 5.685403952588046, "learning_rate": 5.6986432163537e-06, "loss": 17.1033, "step": 25807 }, { "epoch": 0.47174950189189685, "grad_norm": 5.6113171001790985, "learning_rate": 5.698350107514028e-06, "loss": 17.089, "step": 25808 }, { "epoch": 0.47176778108834333, "grad_norm": 6.010913038377717, "learning_rate": 5.698056996226697e-06, "loss": 17.4232, "step": 25809 }, { "epoch": 0.47178606028478987, "grad_norm": 7.38772038955717, "learning_rate": 5.6977638824927385e-06, "loss": 17.7902, "step": 25810 }, { "epoch": 0.4718043394812364, "grad_norm": 6.259717543016462, "learning_rate": 5.697470766313179e-06, "loss": 17.4974, "step": 25811 }, { "epoch": 0.47182261867768294, "grad_norm": 7.24695507568173, "learning_rate": 5.697177647689044e-06, "loss": 18.066, "step": 25812 }, { "epoch": 0.4718408978741295, "grad_norm": 6.138165958681334, "learning_rate": 5.696884526621361e-06, "loss": 17.2708, "step": 25813 }, { "epoch": 0.47185917707057595, "grad_norm": 5.908285647364228, "learning_rate": 5.696591403111158e-06, "loss": 17.0526, "step": 25814 }, { "epoch": 0.4718774562670225, "grad_norm": 5.9555887823675135, "learning_rate": 5.6962982771594645e-06, "loss": 17.3364, "step": 25815 }, { "epoch": 0.471895735463469, "grad_norm": 6.547020115876095, "learning_rate": 5.696005148767305e-06, "loss": 17.6849, "step": 25816 }, { "epoch": 0.47191401465991556, "grad_norm": 5.5296341109417115, "learning_rate": 5.695712017935707e-06, "loss": 17.2958, "step": 25817 }, { "epoch": 0.4719322938563621, "grad_norm": 5.914017653103802, "learning_rate": 5.6954188846657e-06, "loss": 17.355, "step": 25818 }, { "epoch": 0.4719505730528086, "grad_norm": 6.347253209844765, "learning_rate": 5.69512574895831e-06, "loss": 17.6088, "step": 25819 }, { "epoch": 0.4719688522492551, "grad_norm": 6.8219558055636105, "learning_rate": 5.694832610814565e-06, "loss": 17.7155, "step": 25820 }, { "epoch": 0.47198713144570165, "grad_norm": 6.475568929341647, "learning_rate": 5.694539470235491e-06, "loss": 17.4291, "step": 25821 }, { "epoch": 0.4720054106421482, "grad_norm": 6.238996419924572, "learning_rate": 5.694246327222117e-06, "loss": 17.3621, "step": 25822 }, { "epoch": 0.4720236898385947, "grad_norm": 5.822179323273069, "learning_rate": 5.693953181775469e-06, "loss": 17.1884, "step": 25823 }, { "epoch": 0.4720419690350412, "grad_norm": 7.282504982699104, "learning_rate": 5.6936600338965755e-06, "loss": 17.8156, "step": 25824 }, { "epoch": 0.47206024823148773, "grad_norm": 6.595031253693095, "learning_rate": 5.693366883586465e-06, "loss": 17.5496, "step": 25825 }, { "epoch": 0.47207852742793427, "grad_norm": 6.624650194623883, "learning_rate": 5.693073730846162e-06, "loss": 17.6315, "step": 25826 }, { "epoch": 0.4720968066243808, "grad_norm": 6.576914094424634, "learning_rate": 5.6927805756766975e-06, "loss": 17.4683, "step": 25827 }, { "epoch": 0.47211508582082734, "grad_norm": 6.415511974670059, "learning_rate": 5.692487418079096e-06, "loss": 17.4982, "step": 25828 }, { "epoch": 0.4721333650172738, "grad_norm": 6.482107892871176, "learning_rate": 5.692194258054387e-06, "loss": 17.8158, "step": 25829 }, { "epoch": 0.47215164421372036, "grad_norm": 6.574487703205978, "learning_rate": 5.691901095603596e-06, "loss": 17.4369, "step": 25830 }, { "epoch": 0.4721699234101669, "grad_norm": 8.795784141479906, "learning_rate": 5.6916079307277525e-06, "loss": 17.8158, "step": 25831 }, { "epoch": 0.4721882026066134, "grad_norm": 8.248989669523736, "learning_rate": 5.691314763427883e-06, "loss": 17.8837, "step": 25832 }, { "epoch": 0.47220648180305996, "grad_norm": 6.722758123023115, "learning_rate": 5.691021593705015e-06, "loss": 17.8966, "step": 25833 }, { "epoch": 0.47222476099950644, "grad_norm": 7.033595405632596, "learning_rate": 5.690728421560177e-06, "loss": 17.7698, "step": 25834 }, { "epoch": 0.472243040195953, "grad_norm": 5.509576586719086, "learning_rate": 5.690435246994397e-06, "loss": 17.2702, "step": 25835 }, { "epoch": 0.4722613193923995, "grad_norm": 6.454036418745367, "learning_rate": 5.6901420700086985e-06, "loss": 17.6068, "step": 25836 }, { "epoch": 0.47227959858884605, "grad_norm": 6.5781206231900065, "learning_rate": 5.689848890604113e-06, "loss": 17.5991, "step": 25837 }, { "epoch": 0.4722978777852926, "grad_norm": 6.2061747849983675, "learning_rate": 5.689555708781667e-06, "loss": 17.3686, "step": 25838 }, { "epoch": 0.47231615698173907, "grad_norm": 5.537459395375557, "learning_rate": 5.689262524542389e-06, "loss": 17.2471, "step": 25839 }, { "epoch": 0.4723344361781856, "grad_norm": 6.814166713417278, "learning_rate": 5.688969337887305e-06, "loss": 17.7222, "step": 25840 }, { "epoch": 0.47235271537463214, "grad_norm": 6.132540921005035, "learning_rate": 5.688676148817442e-06, "loss": 17.0324, "step": 25841 }, { "epoch": 0.4723709945710787, "grad_norm": 5.9814842542380395, "learning_rate": 5.688382957333831e-06, "loss": 17.4957, "step": 25842 }, { "epoch": 0.47238927376752515, "grad_norm": 6.635167267324463, "learning_rate": 5.688089763437498e-06, "loss": 17.6083, "step": 25843 }, { "epoch": 0.4724075529639717, "grad_norm": 6.772207805176339, "learning_rate": 5.687796567129468e-06, "loss": 18.0892, "step": 25844 }, { "epoch": 0.4724258321604182, "grad_norm": 6.155238518836094, "learning_rate": 5.687503368410772e-06, "loss": 17.3286, "step": 25845 }, { "epoch": 0.47244411135686476, "grad_norm": 6.557655040142976, "learning_rate": 5.687210167282435e-06, "loss": 17.805, "step": 25846 }, { "epoch": 0.4724623905533113, "grad_norm": 5.832099023514787, "learning_rate": 5.686916963745487e-06, "loss": 17.4085, "step": 25847 }, { "epoch": 0.4724806697497578, "grad_norm": 5.806232684173829, "learning_rate": 5.686623757800954e-06, "loss": 17.4479, "step": 25848 }, { "epoch": 0.4724989489462043, "grad_norm": 8.043928830639969, "learning_rate": 5.686330549449866e-06, "loss": 17.8331, "step": 25849 }, { "epoch": 0.47251722814265085, "grad_norm": 6.535933902088941, "learning_rate": 5.6860373386932465e-06, "loss": 17.6848, "step": 25850 }, { "epoch": 0.4725355073390974, "grad_norm": 7.168594583730302, "learning_rate": 5.685744125532127e-06, "loss": 17.5545, "step": 25851 }, { "epoch": 0.4725537865355439, "grad_norm": 5.374707794732045, "learning_rate": 5.685450909967533e-06, "loss": 17.1791, "step": 25852 }, { "epoch": 0.4725720657319904, "grad_norm": 6.280479088854206, "learning_rate": 5.685157692000494e-06, "loss": 17.7383, "step": 25853 }, { "epoch": 0.47259034492843693, "grad_norm": 6.757402852042405, "learning_rate": 5.6848644716320365e-06, "loss": 17.4559, "step": 25854 }, { "epoch": 0.47260862412488347, "grad_norm": 6.907323659330623, "learning_rate": 5.684571248863188e-06, "loss": 17.5365, "step": 25855 }, { "epoch": 0.47262690332133, "grad_norm": 6.153775305233588, "learning_rate": 5.684278023694978e-06, "loss": 17.3031, "step": 25856 }, { "epoch": 0.47264518251777654, "grad_norm": 13.665868608285148, "learning_rate": 5.683984796128432e-06, "loss": 18.349, "step": 25857 }, { "epoch": 0.472663461714223, "grad_norm": 6.817935297092144, "learning_rate": 5.683691566164579e-06, "loss": 17.463, "step": 25858 }, { "epoch": 0.47268174091066956, "grad_norm": 6.551114817335246, "learning_rate": 5.683398333804446e-06, "loss": 17.8015, "step": 25859 }, { "epoch": 0.4727000201071161, "grad_norm": 5.18392331601108, "learning_rate": 5.683105099049061e-06, "loss": 17.0122, "step": 25860 }, { "epoch": 0.47271829930356263, "grad_norm": 6.0659547084974585, "learning_rate": 5.682811861899452e-06, "loss": 17.4556, "step": 25861 }, { "epoch": 0.47273657850000916, "grad_norm": 6.1119645321856195, "learning_rate": 5.682518622356647e-06, "loss": 17.3908, "step": 25862 }, { "epoch": 0.47275485769645564, "grad_norm": 7.284642014411717, "learning_rate": 5.682225380421674e-06, "loss": 17.9496, "step": 25863 }, { "epoch": 0.4727731368929022, "grad_norm": 6.929019893916304, "learning_rate": 5.681932136095558e-06, "loss": 17.4113, "step": 25864 }, { "epoch": 0.4727914160893487, "grad_norm": 6.6258985334680025, "learning_rate": 5.681638889379331e-06, "loss": 17.316, "step": 25865 }, { "epoch": 0.47280969528579525, "grad_norm": 5.242997829500244, "learning_rate": 5.681345640274018e-06, "loss": 16.9227, "step": 25866 }, { "epoch": 0.4728279744822418, "grad_norm": 6.577576620433062, "learning_rate": 5.681052388780649e-06, "loss": 17.6209, "step": 25867 }, { "epoch": 0.47284625367868827, "grad_norm": 5.71879287588394, "learning_rate": 5.680759134900249e-06, "loss": 17.5261, "step": 25868 }, { "epoch": 0.4728645328751348, "grad_norm": 6.196560471184433, "learning_rate": 5.680465878633848e-06, "loss": 17.5856, "step": 25869 }, { "epoch": 0.47288281207158134, "grad_norm": 6.68080064698184, "learning_rate": 5.680172619982474e-06, "loss": 17.7189, "step": 25870 }, { "epoch": 0.4729010912680279, "grad_norm": 6.161544304228171, "learning_rate": 5.6798793589471526e-06, "loss": 17.308, "step": 25871 }, { "epoch": 0.4729193704644744, "grad_norm": 8.332024594696001, "learning_rate": 5.679586095528913e-06, "loss": 17.9046, "step": 25872 }, { "epoch": 0.4729376496609209, "grad_norm": 12.59299099131102, "learning_rate": 5.679292829728783e-06, "loss": 18.5357, "step": 25873 }, { "epoch": 0.4729559288573674, "grad_norm": 7.905383411209632, "learning_rate": 5.678999561547791e-06, "loss": 18.2884, "step": 25874 }, { "epoch": 0.47297420805381396, "grad_norm": 6.661492886445078, "learning_rate": 5.678706290986964e-06, "loss": 17.5146, "step": 25875 }, { "epoch": 0.4729924872502605, "grad_norm": 4.781496507417505, "learning_rate": 5.678413018047331e-06, "loss": 16.9854, "step": 25876 }, { "epoch": 0.473010766446707, "grad_norm": 7.208361767350122, "learning_rate": 5.678119742729919e-06, "loss": 17.4394, "step": 25877 }, { "epoch": 0.4730290456431535, "grad_norm": 6.994738151235073, "learning_rate": 5.6778264650357565e-06, "loss": 17.588, "step": 25878 }, { "epoch": 0.47304732483960005, "grad_norm": 7.306192444508178, "learning_rate": 5.677533184965871e-06, "loss": 17.7977, "step": 25879 }, { "epoch": 0.4730656040360466, "grad_norm": 6.0981017799010075, "learning_rate": 5.677239902521291e-06, "loss": 17.4439, "step": 25880 }, { "epoch": 0.4730838832324931, "grad_norm": 6.438982786471811, "learning_rate": 5.676946617703043e-06, "loss": 17.5578, "step": 25881 }, { "epoch": 0.4731021624289396, "grad_norm": 6.134441006378765, "learning_rate": 5.676653330512155e-06, "loss": 17.29, "step": 25882 }, { "epoch": 0.47312044162538613, "grad_norm": 6.184028519930327, "learning_rate": 5.676360040949656e-06, "loss": 17.6106, "step": 25883 }, { "epoch": 0.47313872082183267, "grad_norm": 6.186597448277817, "learning_rate": 5.676066749016577e-06, "loss": 17.1757, "step": 25884 }, { "epoch": 0.4731570000182792, "grad_norm": 6.887298996952183, "learning_rate": 5.67577345471394e-06, "loss": 17.5008, "step": 25885 }, { "epoch": 0.47317527921472574, "grad_norm": 5.17511996340504, "learning_rate": 5.6754801580427755e-06, "loss": 17.1795, "step": 25886 }, { "epoch": 0.4731935584111722, "grad_norm": 7.466202467595964, "learning_rate": 5.6751868590041125e-06, "loss": 17.8165, "step": 25887 }, { "epoch": 0.47321183760761876, "grad_norm": 7.265150142125658, "learning_rate": 5.6748935575989775e-06, "loss": 17.9262, "step": 25888 }, { "epoch": 0.4732301168040653, "grad_norm": 7.85296419328889, "learning_rate": 5.6746002538284e-06, "loss": 18.3895, "step": 25889 }, { "epoch": 0.47324839600051183, "grad_norm": 6.851732004003136, "learning_rate": 5.674306947693406e-06, "loss": 17.6846, "step": 25890 }, { "epoch": 0.47326667519695836, "grad_norm": 6.713183541630558, "learning_rate": 5.674013639195025e-06, "loss": 17.7856, "step": 25891 }, { "epoch": 0.47328495439340484, "grad_norm": 8.11777700522724, "learning_rate": 5.6737203283342846e-06, "loss": 18.2853, "step": 25892 }, { "epoch": 0.4733032335898514, "grad_norm": 5.807472787390272, "learning_rate": 5.673427015112214e-06, "loss": 17.3082, "step": 25893 }, { "epoch": 0.4733215127862979, "grad_norm": 6.708169580046064, "learning_rate": 5.673133699529841e-06, "loss": 17.7547, "step": 25894 }, { "epoch": 0.47333979198274445, "grad_norm": 5.90589898079177, "learning_rate": 5.67284038158819e-06, "loss": 17.3468, "step": 25895 }, { "epoch": 0.473358071179191, "grad_norm": 6.762469218667008, "learning_rate": 5.672547061288292e-06, "loss": 17.6666, "step": 25896 }, { "epoch": 0.47337635037563747, "grad_norm": 6.57452511513548, "learning_rate": 5.672253738631176e-06, "loss": 17.4593, "step": 25897 }, { "epoch": 0.473394629572084, "grad_norm": 4.857877928913913, "learning_rate": 5.671960413617869e-06, "loss": 16.9493, "step": 25898 }, { "epoch": 0.47341290876853054, "grad_norm": 6.762618836487802, "learning_rate": 5.671667086249399e-06, "loss": 17.3922, "step": 25899 }, { "epoch": 0.4734311879649771, "grad_norm": 7.510592587270578, "learning_rate": 5.671373756526795e-06, "loss": 17.6653, "step": 25900 }, { "epoch": 0.4734494671614236, "grad_norm": 6.607209235175018, "learning_rate": 5.671080424451083e-06, "loss": 17.8866, "step": 25901 }, { "epoch": 0.4734677463578701, "grad_norm": 5.585064724011794, "learning_rate": 5.670787090023293e-06, "loss": 17.0745, "step": 25902 }, { "epoch": 0.4734860255543166, "grad_norm": 6.124320979701702, "learning_rate": 5.670493753244452e-06, "loss": 17.4572, "step": 25903 }, { "epoch": 0.47350430475076316, "grad_norm": 6.563376709963528, "learning_rate": 5.670200414115589e-06, "loss": 17.6055, "step": 25904 }, { "epoch": 0.4735225839472097, "grad_norm": 6.343559243910293, "learning_rate": 5.66990707263773e-06, "loss": 17.398, "step": 25905 }, { "epoch": 0.47354086314365623, "grad_norm": 6.588324721187836, "learning_rate": 5.669613728811907e-06, "loss": 17.7244, "step": 25906 }, { "epoch": 0.4735591423401027, "grad_norm": 7.63767999502871, "learning_rate": 5.669320382639145e-06, "loss": 17.9324, "step": 25907 }, { "epoch": 0.47357742153654925, "grad_norm": 4.932081138185488, "learning_rate": 5.669027034120474e-06, "loss": 16.8443, "step": 25908 }, { "epoch": 0.4735957007329958, "grad_norm": 6.400489033653264, "learning_rate": 5.66873368325692e-06, "loss": 17.6367, "step": 25909 }, { "epoch": 0.4736139799294423, "grad_norm": 5.927089896266275, "learning_rate": 5.6684403300495135e-06, "loss": 17.129, "step": 25910 }, { "epoch": 0.4736322591258888, "grad_norm": 6.13647123586034, "learning_rate": 5.668146974499282e-06, "loss": 17.468, "step": 25911 }, { "epoch": 0.47365053832233533, "grad_norm": 6.247460530699331, "learning_rate": 5.667853616607253e-06, "loss": 17.3167, "step": 25912 }, { "epoch": 0.47366881751878187, "grad_norm": 6.628616623180669, "learning_rate": 5.667560256374455e-06, "loss": 17.5867, "step": 25913 }, { "epoch": 0.4736870967152284, "grad_norm": 6.181169806865533, "learning_rate": 5.667266893801915e-06, "loss": 17.2199, "step": 25914 }, { "epoch": 0.47370537591167494, "grad_norm": 7.61247354414544, "learning_rate": 5.6669735288906634e-06, "loss": 17.9029, "step": 25915 }, { "epoch": 0.4737236551081214, "grad_norm": 7.861975335849096, "learning_rate": 5.666680161641728e-06, "loss": 17.6708, "step": 25916 }, { "epoch": 0.47374193430456796, "grad_norm": 6.866773229149948, "learning_rate": 5.6663867920561354e-06, "loss": 17.6993, "step": 25917 }, { "epoch": 0.4737602135010145, "grad_norm": 8.542882237612853, "learning_rate": 5.666093420134916e-06, "loss": 18.2594, "step": 25918 }, { "epoch": 0.47377849269746103, "grad_norm": 6.402801659901995, "learning_rate": 5.665800045879097e-06, "loss": 17.5634, "step": 25919 }, { "epoch": 0.47379677189390756, "grad_norm": 6.7766241834966525, "learning_rate": 5.665506669289705e-06, "loss": 17.5705, "step": 25920 }, { "epoch": 0.47381505109035404, "grad_norm": 9.444608585705058, "learning_rate": 5.66521329036777e-06, "loss": 18.2186, "step": 25921 }, { "epoch": 0.4738333302868006, "grad_norm": 5.785322703644276, "learning_rate": 5.664919909114322e-06, "loss": 17.4296, "step": 25922 }, { "epoch": 0.4738516094832471, "grad_norm": 6.267172285029286, "learning_rate": 5.664626525530385e-06, "loss": 17.4039, "step": 25923 }, { "epoch": 0.47386988867969365, "grad_norm": 5.974093653947869, "learning_rate": 5.664333139616992e-06, "loss": 17.5757, "step": 25924 }, { "epoch": 0.4738881678761402, "grad_norm": 7.500747167628114, "learning_rate": 5.66403975137517e-06, "loss": 17.7725, "step": 25925 }, { "epoch": 0.47390644707258667, "grad_norm": 5.964027363515949, "learning_rate": 5.663746360805944e-06, "loss": 17.081, "step": 25926 }, { "epoch": 0.4739247262690332, "grad_norm": 6.290251241856698, "learning_rate": 5.663452967910345e-06, "loss": 17.6068, "step": 25927 }, { "epoch": 0.47394300546547974, "grad_norm": 5.208218032142328, "learning_rate": 5.6631595726894e-06, "loss": 17.0189, "step": 25928 }, { "epoch": 0.4739612846619263, "grad_norm": 7.29574622467421, "learning_rate": 5.662866175144139e-06, "loss": 17.9344, "step": 25929 }, { "epoch": 0.4739795638583728, "grad_norm": 5.434198716830011, "learning_rate": 5.662572775275591e-06, "loss": 17.1999, "step": 25930 }, { "epoch": 0.4739978430548193, "grad_norm": 6.895067401429106, "learning_rate": 5.662279373084782e-06, "loss": 18.0767, "step": 25931 }, { "epoch": 0.4740161222512658, "grad_norm": 6.052100713760119, "learning_rate": 5.661985968572741e-06, "loss": 17.3477, "step": 25932 }, { "epoch": 0.47403440144771236, "grad_norm": 7.534649191841099, "learning_rate": 5.6616925617404965e-06, "loss": 17.4309, "step": 25933 }, { "epoch": 0.4740526806441589, "grad_norm": 5.7560926047448335, "learning_rate": 5.661399152589077e-06, "loss": 17.3121, "step": 25934 }, { "epoch": 0.47407095984060543, "grad_norm": 5.212578901397439, "learning_rate": 5.6611057411195115e-06, "loss": 17.0144, "step": 25935 }, { "epoch": 0.4740892390370519, "grad_norm": 6.813388999902722, "learning_rate": 5.660812327332825e-06, "loss": 17.7488, "step": 25936 }, { "epoch": 0.47410751823349845, "grad_norm": 5.903166788230887, "learning_rate": 5.6605189112300515e-06, "loss": 17.2933, "step": 25937 }, { "epoch": 0.474125797429945, "grad_norm": 5.312244703909885, "learning_rate": 5.660225492812216e-06, "loss": 17.2784, "step": 25938 }, { "epoch": 0.4741440766263915, "grad_norm": 7.317861567662505, "learning_rate": 5.659932072080349e-06, "loss": 18.0144, "step": 25939 }, { "epoch": 0.47416235582283806, "grad_norm": 5.920722899051549, "learning_rate": 5.6596386490354745e-06, "loss": 17.3487, "step": 25940 }, { "epoch": 0.47418063501928454, "grad_norm": 6.411628454375291, "learning_rate": 5.659345223678624e-06, "loss": 17.5412, "step": 25941 }, { "epoch": 0.47419891421573107, "grad_norm": 7.406774194138147, "learning_rate": 5.659051796010828e-06, "loss": 18.295, "step": 25942 }, { "epoch": 0.4742171934121776, "grad_norm": 5.98952482355676, "learning_rate": 5.658758366033111e-06, "loss": 17.0943, "step": 25943 }, { "epoch": 0.47423547260862414, "grad_norm": 6.0568664036181366, "learning_rate": 5.6584649337465036e-06, "loss": 17.3889, "step": 25944 }, { "epoch": 0.4742537518050706, "grad_norm": 7.333534487383448, "learning_rate": 5.6581714991520335e-06, "loss": 18.02, "step": 25945 }, { "epoch": 0.47427203100151716, "grad_norm": 5.345981274064981, "learning_rate": 5.657878062250729e-06, "loss": 17.1343, "step": 25946 }, { "epoch": 0.4742903101979637, "grad_norm": 7.045236121429757, "learning_rate": 5.657584623043619e-06, "loss": 17.6542, "step": 25947 }, { "epoch": 0.47430858939441023, "grad_norm": 5.870662473700976, "learning_rate": 5.657291181531732e-06, "loss": 17.3836, "step": 25948 }, { "epoch": 0.47432686859085677, "grad_norm": 6.906669538816162, "learning_rate": 5.6569977377160985e-06, "loss": 17.6594, "step": 25949 }, { "epoch": 0.47434514778730325, "grad_norm": 5.133526592293263, "learning_rate": 5.656704291597742e-06, "loss": 16.8409, "step": 25950 }, { "epoch": 0.4743634269837498, "grad_norm": 6.365411223856171, "learning_rate": 5.656410843177695e-06, "loss": 17.318, "step": 25951 }, { "epoch": 0.4743817061801963, "grad_norm": 5.788346236899196, "learning_rate": 5.656117392456986e-06, "loss": 17.2644, "step": 25952 }, { "epoch": 0.47439998537664285, "grad_norm": 6.593894142725526, "learning_rate": 5.655823939436642e-06, "loss": 17.4617, "step": 25953 }, { "epoch": 0.4744182645730894, "grad_norm": 7.152965042730411, "learning_rate": 5.655530484117691e-06, "loss": 17.8426, "step": 25954 }, { "epoch": 0.47443654376953587, "grad_norm": 5.5782092289664575, "learning_rate": 5.655237026501162e-06, "loss": 17.3581, "step": 25955 }, { "epoch": 0.4744548229659824, "grad_norm": 6.861717928090626, "learning_rate": 5.654943566588087e-06, "loss": 17.9868, "step": 25956 }, { "epoch": 0.47447310216242894, "grad_norm": 6.025530385063021, "learning_rate": 5.65465010437949e-06, "loss": 17.5004, "step": 25957 }, { "epoch": 0.4744913813588755, "grad_norm": 5.756300202233745, "learning_rate": 5.654356639876401e-06, "loss": 16.9075, "step": 25958 }, { "epoch": 0.474509660555322, "grad_norm": 5.906484596240676, "learning_rate": 5.654063173079849e-06, "loss": 17.4008, "step": 25959 }, { "epoch": 0.4745279397517685, "grad_norm": 6.307032854882647, "learning_rate": 5.6537697039908616e-06, "loss": 17.4723, "step": 25960 }, { "epoch": 0.474546218948215, "grad_norm": 7.954707887473592, "learning_rate": 5.653476232610469e-06, "loss": 18.1062, "step": 25961 }, { "epoch": 0.47456449814466156, "grad_norm": 8.200524458621782, "learning_rate": 5.653182758939698e-06, "loss": 18.505, "step": 25962 }, { "epoch": 0.4745827773411081, "grad_norm": 6.6348159359859515, "learning_rate": 5.652889282979579e-06, "loss": 17.3301, "step": 25963 }, { "epoch": 0.47460105653755463, "grad_norm": 5.168494641857537, "learning_rate": 5.652595804731139e-06, "loss": 17.0669, "step": 25964 }, { "epoch": 0.4746193357340011, "grad_norm": 5.690613203882012, "learning_rate": 5.6523023241954076e-06, "loss": 17.1961, "step": 25965 }, { "epoch": 0.47463761493044765, "grad_norm": 6.898315220190865, "learning_rate": 5.652008841373413e-06, "loss": 17.6733, "step": 25966 }, { "epoch": 0.4746558941268942, "grad_norm": 6.814659740351716, "learning_rate": 5.651715356266187e-06, "loss": 17.678, "step": 25967 }, { "epoch": 0.4746741733233407, "grad_norm": 6.526756873649642, "learning_rate": 5.65142186887475e-06, "loss": 17.4209, "step": 25968 }, { "epoch": 0.47469245251978726, "grad_norm": 6.556606698719368, "learning_rate": 5.651128379200139e-06, "loss": 17.5862, "step": 25969 }, { "epoch": 0.47471073171623374, "grad_norm": 5.146666597874986, "learning_rate": 5.650834887243379e-06, "loss": 16.9093, "step": 25970 }, { "epoch": 0.47472901091268027, "grad_norm": 7.757187958135804, "learning_rate": 5.6505413930055e-06, "loss": 17.8769, "step": 25971 }, { "epoch": 0.4747472901091268, "grad_norm": 6.710453664682444, "learning_rate": 5.650247896487528e-06, "loss": 17.8814, "step": 25972 }, { "epoch": 0.47476556930557334, "grad_norm": 7.123010685500772, "learning_rate": 5.649954397690496e-06, "loss": 17.6994, "step": 25973 }, { "epoch": 0.4747838485020199, "grad_norm": 4.969359930302907, "learning_rate": 5.649660896615428e-06, "loss": 16.9672, "step": 25974 }, { "epoch": 0.47480212769846636, "grad_norm": 6.9708902897066185, "learning_rate": 5.6493673932633555e-06, "loss": 17.9988, "step": 25975 }, { "epoch": 0.4748204068949129, "grad_norm": 5.975754995200632, "learning_rate": 5.6490738876353066e-06, "loss": 17.502, "step": 25976 }, { "epoch": 0.47483868609135943, "grad_norm": 5.851913605484984, "learning_rate": 5.64878037973231e-06, "loss": 17.3717, "step": 25977 }, { "epoch": 0.47485696528780597, "grad_norm": 5.6586834549541125, "learning_rate": 5.648486869555395e-06, "loss": 17.2946, "step": 25978 }, { "epoch": 0.47487524448425245, "grad_norm": 7.157862049440766, "learning_rate": 5.64819335710559e-06, "loss": 17.7344, "step": 25979 }, { "epoch": 0.474893523680699, "grad_norm": 8.014626993561148, "learning_rate": 5.647899842383923e-06, "loss": 18.4884, "step": 25980 }, { "epoch": 0.4749118028771455, "grad_norm": 6.046323901293027, "learning_rate": 5.647606325391425e-06, "loss": 17.1595, "step": 25981 }, { "epoch": 0.47493008207359205, "grad_norm": 6.691217919244507, "learning_rate": 5.647312806129119e-06, "loss": 17.7766, "step": 25982 }, { "epoch": 0.4749483612700386, "grad_norm": 6.359084571375429, "learning_rate": 5.647019284598041e-06, "loss": 17.3846, "step": 25983 }, { "epoch": 0.47496664046648507, "grad_norm": 6.427892313531851, "learning_rate": 5.646725760799216e-06, "loss": 17.4413, "step": 25984 }, { "epoch": 0.4749849196629316, "grad_norm": 6.274193033345976, "learning_rate": 5.646432234733674e-06, "loss": 17.6532, "step": 25985 }, { "epoch": 0.47500319885937814, "grad_norm": 6.768050627752301, "learning_rate": 5.646138706402442e-06, "loss": 17.5241, "step": 25986 }, { "epoch": 0.4750214780558247, "grad_norm": 5.819678207070442, "learning_rate": 5.64584517580655e-06, "loss": 17.5962, "step": 25987 }, { "epoch": 0.4750397572522712, "grad_norm": 5.951836988982997, "learning_rate": 5.645551642947027e-06, "loss": 17.3161, "step": 25988 }, { "epoch": 0.4750580364487177, "grad_norm": 6.789371409648295, "learning_rate": 5.6452581078249006e-06, "loss": 17.5762, "step": 25989 }, { "epoch": 0.4750763156451642, "grad_norm": 9.374394706643287, "learning_rate": 5.644964570441202e-06, "loss": 18.3603, "step": 25990 }, { "epoch": 0.47509459484161076, "grad_norm": 7.474719096925224, "learning_rate": 5.644671030796958e-06, "loss": 18.0784, "step": 25991 }, { "epoch": 0.4751128740380573, "grad_norm": 5.8469241580465985, "learning_rate": 5.6443774888931964e-06, "loss": 16.9541, "step": 25992 }, { "epoch": 0.47513115323450383, "grad_norm": 6.610767546635071, "learning_rate": 5.644083944730949e-06, "loss": 17.4141, "step": 25993 }, { "epoch": 0.4751494324309503, "grad_norm": 5.82041686861602, "learning_rate": 5.643790398311244e-06, "loss": 17.0751, "step": 25994 }, { "epoch": 0.47516771162739685, "grad_norm": 6.541205305517173, "learning_rate": 5.64349684963511e-06, "loss": 17.5317, "step": 25995 }, { "epoch": 0.4751859908238434, "grad_norm": 5.705654479397359, "learning_rate": 5.643203298703572e-06, "loss": 17.2416, "step": 25996 }, { "epoch": 0.4752042700202899, "grad_norm": 5.929132218688094, "learning_rate": 5.642909745517665e-06, "loss": 17.0134, "step": 25997 }, { "epoch": 0.47522254921673646, "grad_norm": 6.505326066339509, "learning_rate": 5.6426161900784146e-06, "loss": 17.3633, "step": 25998 }, { "epoch": 0.47524082841318294, "grad_norm": 6.685730152464692, "learning_rate": 5.642322632386851e-06, "loss": 17.5658, "step": 25999 }, { "epoch": 0.47525910760962947, "grad_norm": 6.376676619157217, "learning_rate": 5.642029072443999e-06, "loss": 17.6068, "step": 26000 }, { "epoch": 0.475277386806076, "grad_norm": 7.857717160114505, "learning_rate": 5.641735510250895e-06, "loss": 18.0532, "step": 26001 }, { "epoch": 0.47529566600252254, "grad_norm": 6.084575919193199, "learning_rate": 5.641441945808562e-06, "loss": 17.3179, "step": 26002 }, { "epoch": 0.4753139451989691, "grad_norm": 5.884143566256101, "learning_rate": 5.641148379118031e-06, "loss": 17.3732, "step": 26003 }, { "epoch": 0.47533222439541556, "grad_norm": 7.197660095175341, "learning_rate": 5.64085481018033e-06, "loss": 17.9315, "step": 26004 }, { "epoch": 0.4753505035918621, "grad_norm": 7.2089115755654625, "learning_rate": 5.640561238996489e-06, "loss": 17.9124, "step": 26005 }, { "epoch": 0.47536878278830863, "grad_norm": 6.665101417994158, "learning_rate": 5.640267665567536e-06, "loss": 17.5239, "step": 26006 }, { "epoch": 0.47538706198475517, "grad_norm": 6.039014754649297, "learning_rate": 5.6399740898944995e-06, "loss": 17.5545, "step": 26007 }, { "epoch": 0.4754053411812017, "grad_norm": 5.889816194995486, "learning_rate": 5.6396805119784125e-06, "loss": 17.4227, "step": 26008 }, { "epoch": 0.4754236203776482, "grad_norm": 6.266189007191372, "learning_rate": 5.639386931820298e-06, "loss": 17.5186, "step": 26009 }, { "epoch": 0.4754418995740947, "grad_norm": 5.6734377472976485, "learning_rate": 5.639093349421187e-06, "loss": 17.2666, "step": 26010 }, { "epoch": 0.47546017877054125, "grad_norm": 5.059160481785002, "learning_rate": 5.638799764782113e-06, "loss": 16.9287, "step": 26011 }, { "epoch": 0.4754784579669878, "grad_norm": 7.62269200425769, "learning_rate": 5.6385061779040986e-06, "loss": 17.7419, "step": 26012 }, { "epoch": 0.47549673716343427, "grad_norm": 7.4300084586237745, "learning_rate": 5.638212588788175e-06, "loss": 17.9371, "step": 26013 }, { "epoch": 0.4755150163598808, "grad_norm": 6.190862055253693, "learning_rate": 5.6379189974353724e-06, "loss": 17.4832, "step": 26014 }, { "epoch": 0.47553329555632734, "grad_norm": 6.148301697943547, "learning_rate": 5.63762540384672e-06, "loss": 17.6687, "step": 26015 }, { "epoch": 0.4755515747527739, "grad_norm": 6.783249319787577, "learning_rate": 5.6373318080232455e-06, "loss": 17.8364, "step": 26016 }, { "epoch": 0.4755698539492204, "grad_norm": 9.257873471346558, "learning_rate": 5.637038209965977e-06, "loss": 18.4764, "step": 26017 }, { "epoch": 0.4755881331456669, "grad_norm": 6.158810833202474, "learning_rate": 5.636744609675946e-06, "loss": 17.564, "step": 26018 }, { "epoch": 0.4756064123421134, "grad_norm": 6.401517358591553, "learning_rate": 5.63645100715418e-06, "loss": 17.5254, "step": 26019 }, { "epoch": 0.47562469153855996, "grad_norm": 5.391545148250154, "learning_rate": 5.6361574024017085e-06, "loss": 17.2427, "step": 26020 }, { "epoch": 0.4756429707350065, "grad_norm": 7.675663362478772, "learning_rate": 5.635863795419561e-06, "loss": 18.3336, "step": 26021 }, { "epoch": 0.47566124993145303, "grad_norm": 4.807485419133941, "learning_rate": 5.6355701862087665e-06, "loss": 16.82, "step": 26022 }, { "epoch": 0.4756795291278995, "grad_norm": 6.581052885289084, "learning_rate": 5.635276574770352e-06, "loss": 17.4988, "step": 26023 }, { "epoch": 0.47569780832434605, "grad_norm": 5.08216685605317, "learning_rate": 5.634982961105349e-06, "loss": 17.0414, "step": 26024 }, { "epoch": 0.4757160875207926, "grad_norm": 6.988364270643437, "learning_rate": 5.634689345214787e-06, "loss": 18.0667, "step": 26025 }, { "epoch": 0.4757343667172391, "grad_norm": 6.381569064303758, "learning_rate": 5.634395727099692e-06, "loss": 17.3598, "step": 26026 }, { "epoch": 0.47575264591368566, "grad_norm": 6.380849986204152, "learning_rate": 5.634102106761095e-06, "loss": 17.548, "step": 26027 }, { "epoch": 0.47577092511013214, "grad_norm": 7.404830013781351, "learning_rate": 5.633808484200026e-06, "loss": 17.8752, "step": 26028 }, { "epoch": 0.4757892043065787, "grad_norm": 6.500785528836138, "learning_rate": 5.633514859417514e-06, "loss": 17.56, "step": 26029 }, { "epoch": 0.4758074835030252, "grad_norm": 6.321136405646386, "learning_rate": 5.633221232414587e-06, "loss": 17.3529, "step": 26030 }, { "epoch": 0.47582576269947174, "grad_norm": 5.165881963417881, "learning_rate": 5.632927603192274e-06, "loss": 17.038, "step": 26031 }, { "epoch": 0.4758440418959183, "grad_norm": 6.385283406981002, "learning_rate": 5.632633971751604e-06, "loss": 17.1728, "step": 26032 }, { "epoch": 0.47586232109236476, "grad_norm": 7.2731084059304365, "learning_rate": 5.632340338093608e-06, "loss": 17.9146, "step": 26033 }, { "epoch": 0.4758806002888113, "grad_norm": 6.42716275932108, "learning_rate": 5.632046702219314e-06, "loss": 17.2662, "step": 26034 }, { "epoch": 0.47589887948525783, "grad_norm": 6.9941749540297105, "learning_rate": 5.631753064129751e-06, "loss": 17.4194, "step": 26035 }, { "epoch": 0.47591715868170437, "grad_norm": 6.033991187815937, "learning_rate": 5.631459423825948e-06, "loss": 17.294, "step": 26036 }, { "epoch": 0.4759354378781509, "grad_norm": 5.936301524659878, "learning_rate": 5.631165781308934e-06, "loss": 17.7412, "step": 26037 }, { "epoch": 0.4759537170745974, "grad_norm": 6.1020200600307835, "learning_rate": 5.630872136579739e-06, "loss": 17.3488, "step": 26038 }, { "epoch": 0.4759719962710439, "grad_norm": 6.877421053511342, "learning_rate": 5.630578489639393e-06, "loss": 17.6673, "step": 26039 }, { "epoch": 0.47599027546749045, "grad_norm": 4.975494633060373, "learning_rate": 5.6302848404889245e-06, "loss": 16.8864, "step": 26040 }, { "epoch": 0.476008554663937, "grad_norm": 6.525103130084275, "learning_rate": 5.62999118912936e-06, "loss": 17.4129, "step": 26041 }, { "epoch": 0.4760268338603835, "grad_norm": 5.996496404997624, "learning_rate": 5.629697535561733e-06, "loss": 17.3319, "step": 26042 }, { "epoch": 0.47604511305683, "grad_norm": 7.351909418351181, "learning_rate": 5.62940387978707e-06, "loss": 17.9268, "step": 26043 }, { "epoch": 0.47606339225327654, "grad_norm": 5.526687594872396, "learning_rate": 5.6291102218064005e-06, "loss": 17.299, "step": 26044 }, { "epoch": 0.4760816714497231, "grad_norm": 5.425425747437962, "learning_rate": 5.628816561620755e-06, "loss": 16.9579, "step": 26045 }, { "epoch": 0.4760999506461696, "grad_norm": 6.642975514835074, "learning_rate": 5.628522899231163e-06, "loss": 17.9246, "step": 26046 }, { "epoch": 0.4761182298426161, "grad_norm": 5.958625007699751, "learning_rate": 5.62822923463865e-06, "loss": 17.1009, "step": 26047 }, { "epoch": 0.4761365090390626, "grad_norm": 7.628080431055862, "learning_rate": 5.62793556784425e-06, "loss": 18.0556, "step": 26048 }, { "epoch": 0.47615478823550916, "grad_norm": 5.801207100153378, "learning_rate": 5.6276418988489916e-06, "loss": 17.2965, "step": 26049 }, { "epoch": 0.4761730674319557, "grad_norm": 9.79241349374093, "learning_rate": 5.627348227653899e-06, "loss": 18.6229, "step": 26050 }, { "epoch": 0.47619134662840223, "grad_norm": 6.781462717629474, "learning_rate": 5.627054554260008e-06, "loss": 17.4345, "step": 26051 }, { "epoch": 0.4762096258248487, "grad_norm": 5.812752241444554, "learning_rate": 5.626760878668344e-06, "loss": 17.3331, "step": 26052 }, { "epoch": 0.47622790502129525, "grad_norm": 6.147366690483685, "learning_rate": 5.62646720087994e-06, "loss": 17.7013, "step": 26053 }, { "epoch": 0.4762461842177418, "grad_norm": 5.122933959756373, "learning_rate": 5.626173520895821e-06, "loss": 17.1301, "step": 26054 }, { "epoch": 0.4762644634141883, "grad_norm": 7.401273026911979, "learning_rate": 5.6258798387170165e-06, "loss": 17.7734, "step": 26055 }, { "epoch": 0.47628274261063486, "grad_norm": 4.933972472532199, "learning_rate": 5.6255861543445615e-06, "loss": 16.9932, "step": 26056 }, { "epoch": 0.47630102180708134, "grad_norm": 4.427933087749209, "learning_rate": 5.625292467779479e-06, "loss": 16.7622, "step": 26057 }, { "epoch": 0.4763193010035279, "grad_norm": 5.7662417096621965, "learning_rate": 5.6249987790228e-06, "loss": 17.4404, "step": 26058 }, { "epoch": 0.4763375801999744, "grad_norm": 6.577856752167029, "learning_rate": 5.624705088075555e-06, "loss": 17.7029, "step": 26059 }, { "epoch": 0.47635585939642094, "grad_norm": 6.67163525678445, "learning_rate": 5.624411394938772e-06, "loss": 17.5504, "step": 26060 }, { "epoch": 0.4763741385928675, "grad_norm": 6.360925961932458, "learning_rate": 5.624117699613483e-06, "loss": 17.6593, "step": 26061 }, { "epoch": 0.47639241778931396, "grad_norm": 7.586802719897017, "learning_rate": 5.6238240021007155e-06, "loss": 17.9586, "step": 26062 }, { "epoch": 0.4764106969857605, "grad_norm": 4.899688214664459, "learning_rate": 5.6235303024014975e-06, "loss": 16.7137, "step": 26063 }, { "epoch": 0.47642897618220703, "grad_norm": 7.138013485464661, "learning_rate": 5.623236600516861e-06, "loss": 18.2766, "step": 26064 }, { "epoch": 0.47644725537865357, "grad_norm": 7.056691352433283, "learning_rate": 5.622942896447834e-06, "loss": 17.9555, "step": 26065 }, { "epoch": 0.4764655345751001, "grad_norm": 6.519716635122897, "learning_rate": 5.622649190195446e-06, "loss": 17.7805, "step": 26066 }, { "epoch": 0.4764838137715466, "grad_norm": 6.2039302734864465, "learning_rate": 5.622355481760728e-06, "loss": 17.3564, "step": 26067 }, { "epoch": 0.4765020929679931, "grad_norm": 6.708680452902, "learning_rate": 5.622061771144706e-06, "loss": 17.8124, "step": 26068 }, { "epoch": 0.47652037216443965, "grad_norm": 6.146022834553187, "learning_rate": 5.621768058348413e-06, "loss": 17.3903, "step": 26069 }, { "epoch": 0.4765386513608862, "grad_norm": 7.644073505989182, "learning_rate": 5.621474343372877e-06, "loss": 17.859, "step": 26070 }, { "epoch": 0.4765569305573327, "grad_norm": 6.31873404662728, "learning_rate": 5.621180626219126e-06, "loss": 17.6161, "step": 26071 }, { "epoch": 0.4765752097537792, "grad_norm": 6.857044687447573, "learning_rate": 5.620886906888191e-06, "loss": 17.5395, "step": 26072 }, { "epoch": 0.47659348895022574, "grad_norm": 6.538786504817792, "learning_rate": 5.620593185381102e-06, "loss": 17.6519, "step": 26073 }, { "epoch": 0.4766117681466723, "grad_norm": 6.460891049638645, "learning_rate": 5.6202994616988884e-06, "loss": 17.6651, "step": 26074 }, { "epoch": 0.4766300473431188, "grad_norm": 5.771273994807442, "learning_rate": 5.620005735842577e-06, "loss": 17.2586, "step": 26075 }, { "epoch": 0.47664832653956535, "grad_norm": 6.34127645290423, "learning_rate": 5.6197120078132005e-06, "loss": 17.2723, "step": 26076 }, { "epoch": 0.47666660573601183, "grad_norm": 7.6002476847563525, "learning_rate": 5.619418277611788e-06, "loss": 17.7249, "step": 26077 }, { "epoch": 0.47668488493245836, "grad_norm": 5.6028576690747185, "learning_rate": 5.619124545239366e-06, "loss": 17.1663, "step": 26078 }, { "epoch": 0.4767031641289049, "grad_norm": 6.8009010872796845, "learning_rate": 5.618830810696968e-06, "loss": 17.3903, "step": 26079 }, { "epoch": 0.47672144332535143, "grad_norm": 6.2757260823554475, "learning_rate": 5.6185370739856226e-06, "loss": 17.5399, "step": 26080 }, { "epoch": 0.4767397225217979, "grad_norm": 7.007343328296595, "learning_rate": 5.618243335106357e-06, "loss": 17.8199, "step": 26081 }, { "epoch": 0.47675800171824445, "grad_norm": 7.868802959252958, "learning_rate": 5.6179495940602005e-06, "loss": 17.9974, "step": 26082 }, { "epoch": 0.476776280914691, "grad_norm": 7.3481225366603, "learning_rate": 5.617655850848186e-06, "loss": 18.1017, "step": 26083 }, { "epoch": 0.4767945601111375, "grad_norm": 6.664199757996422, "learning_rate": 5.617362105471342e-06, "loss": 18.04, "step": 26084 }, { "epoch": 0.47681283930758406, "grad_norm": 8.498700739774147, "learning_rate": 5.617068357930697e-06, "loss": 17.8875, "step": 26085 }, { "epoch": 0.47683111850403054, "grad_norm": 6.243252142764387, "learning_rate": 5.61677460822728e-06, "loss": 17.4461, "step": 26086 }, { "epoch": 0.4768493977004771, "grad_norm": 6.937977061291332, "learning_rate": 5.616480856362123e-06, "loss": 17.905, "step": 26087 }, { "epoch": 0.4768676768969236, "grad_norm": 6.865585270736471, "learning_rate": 5.616187102336252e-06, "loss": 17.6613, "step": 26088 }, { "epoch": 0.47688595609337014, "grad_norm": 4.784125178394753, "learning_rate": 5.6158933461507e-06, "loss": 16.9239, "step": 26089 }, { "epoch": 0.4769042352898167, "grad_norm": 6.590838836207453, "learning_rate": 5.615599587806496e-06, "loss": 17.2997, "step": 26090 }, { "epoch": 0.47692251448626316, "grad_norm": 6.671508649687292, "learning_rate": 5.615305827304668e-06, "loss": 17.68, "step": 26091 }, { "epoch": 0.4769407936827097, "grad_norm": 7.76100921275125, "learning_rate": 5.615012064646247e-06, "loss": 18.0952, "step": 26092 }, { "epoch": 0.47695907287915623, "grad_norm": 6.088363358254355, "learning_rate": 5.614718299832262e-06, "loss": 17.33, "step": 26093 }, { "epoch": 0.47697735207560277, "grad_norm": 7.614245279204956, "learning_rate": 5.614424532863743e-06, "loss": 17.8937, "step": 26094 }, { "epoch": 0.4769956312720493, "grad_norm": 6.900345840051787, "learning_rate": 5.614130763741717e-06, "loss": 17.6068, "step": 26095 }, { "epoch": 0.4770139104684958, "grad_norm": 6.964787476654074, "learning_rate": 5.613836992467217e-06, "loss": 17.4893, "step": 26096 }, { "epoch": 0.4770321896649423, "grad_norm": 6.002296108303925, "learning_rate": 5.613543219041273e-06, "loss": 17.4203, "step": 26097 }, { "epoch": 0.47705046886138885, "grad_norm": 6.834447217700692, "learning_rate": 5.613249443464913e-06, "loss": 17.9869, "step": 26098 }, { "epoch": 0.4770687480578354, "grad_norm": 6.104831198554275, "learning_rate": 5.612955665739167e-06, "loss": 17.3504, "step": 26099 }, { "epoch": 0.4770870272542819, "grad_norm": 6.449593406950984, "learning_rate": 5.612661885865063e-06, "loss": 17.4109, "step": 26100 }, { "epoch": 0.4771053064507284, "grad_norm": 7.0224103024352615, "learning_rate": 5.612368103843634e-06, "loss": 17.5904, "step": 26101 }, { "epoch": 0.47712358564717494, "grad_norm": 6.135187132870457, "learning_rate": 5.612074319675907e-06, "loss": 17.4386, "step": 26102 }, { "epoch": 0.4771418648436215, "grad_norm": 5.0770678705615495, "learning_rate": 5.611780533362913e-06, "loss": 17.1951, "step": 26103 }, { "epoch": 0.477160144040068, "grad_norm": 5.778558709472459, "learning_rate": 5.611486744905681e-06, "loss": 17.3517, "step": 26104 }, { "epoch": 0.47717842323651455, "grad_norm": 6.9569184192284546, "learning_rate": 5.611192954305241e-06, "loss": 17.6013, "step": 26105 }, { "epoch": 0.47719670243296103, "grad_norm": 5.554985824878484, "learning_rate": 5.610899161562623e-06, "loss": 17.3215, "step": 26106 }, { "epoch": 0.47721498162940756, "grad_norm": 7.256870840413215, "learning_rate": 5.6106053666788566e-06, "loss": 18.0335, "step": 26107 }, { "epoch": 0.4772332608258541, "grad_norm": 6.578082303262216, "learning_rate": 5.610311569654972e-06, "loss": 17.5233, "step": 26108 }, { "epoch": 0.47725154002230064, "grad_norm": 7.0026002953920905, "learning_rate": 5.610017770491995e-06, "loss": 17.642, "step": 26109 }, { "epoch": 0.47726981921874717, "grad_norm": 8.634285905142995, "learning_rate": 5.609723969190963e-06, "loss": 18.8466, "step": 26110 }, { "epoch": 0.47728809841519365, "grad_norm": 5.591100147596347, "learning_rate": 5.6094301657529e-06, "loss": 16.9774, "step": 26111 }, { "epoch": 0.4773063776116402, "grad_norm": 6.155160576115009, "learning_rate": 5.609136360178837e-06, "loss": 17.5168, "step": 26112 }, { "epoch": 0.4773246568080867, "grad_norm": 7.1575398367811545, "learning_rate": 5.608842552469802e-06, "loss": 17.7671, "step": 26113 }, { "epoch": 0.47734293600453326, "grad_norm": 7.510841169697521, "learning_rate": 5.608548742626827e-06, "loss": 18.2355, "step": 26114 }, { "epoch": 0.47736121520097974, "grad_norm": 5.402852436341276, "learning_rate": 5.608254930650944e-06, "loss": 17.061, "step": 26115 }, { "epoch": 0.4773794943974263, "grad_norm": 5.388457298037819, "learning_rate": 5.607961116543179e-06, "loss": 16.9783, "step": 26116 }, { "epoch": 0.4773977735938728, "grad_norm": 6.750534929649605, "learning_rate": 5.607667300304563e-06, "loss": 17.8664, "step": 26117 }, { "epoch": 0.47741605279031935, "grad_norm": 7.515028893326799, "learning_rate": 5.607373481936126e-06, "loss": 17.9722, "step": 26118 }, { "epoch": 0.4774343319867659, "grad_norm": 6.097286987623301, "learning_rate": 5.607079661438897e-06, "loss": 17.3201, "step": 26119 }, { "epoch": 0.47745261118321236, "grad_norm": 5.582122826765157, "learning_rate": 5.606785838813907e-06, "loss": 17.1418, "step": 26120 }, { "epoch": 0.4774708903796589, "grad_norm": 5.719784680253939, "learning_rate": 5.6064920140621846e-06, "loss": 17.3713, "step": 26121 }, { "epoch": 0.47748916957610543, "grad_norm": 5.744226850913215, "learning_rate": 5.606198187184762e-06, "loss": 17.2014, "step": 26122 }, { "epoch": 0.47750744877255197, "grad_norm": 7.999450040902364, "learning_rate": 5.605904358182666e-06, "loss": 18.2115, "step": 26123 }, { "epoch": 0.4775257279689985, "grad_norm": 6.8892260325088435, "learning_rate": 5.605610527056927e-06, "loss": 18.0052, "step": 26124 }, { "epoch": 0.477544007165445, "grad_norm": 6.857778199194525, "learning_rate": 5.605316693808578e-06, "loss": 17.5765, "step": 26125 }, { "epoch": 0.4775622863618915, "grad_norm": 5.734045895742806, "learning_rate": 5.6050228584386456e-06, "loss": 17.4257, "step": 26126 }, { "epoch": 0.47758056555833805, "grad_norm": 6.357934785959785, "learning_rate": 5.604729020948158e-06, "loss": 17.6335, "step": 26127 }, { "epoch": 0.4775988447547846, "grad_norm": 7.308077097943614, "learning_rate": 5.604435181338151e-06, "loss": 17.7287, "step": 26128 }, { "epoch": 0.4776171239512311, "grad_norm": 8.951314214338932, "learning_rate": 5.6041413396096515e-06, "loss": 17.9758, "step": 26129 }, { "epoch": 0.4776354031476776, "grad_norm": 5.470556974764741, "learning_rate": 5.603847495763687e-06, "loss": 17.1528, "step": 26130 }, { "epoch": 0.47765368234412414, "grad_norm": 6.183881165989675, "learning_rate": 5.60355364980129e-06, "loss": 17.3334, "step": 26131 }, { "epoch": 0.4776719615405707, "grad_norm": 7.066583803623028, "learning_rate": 5.603259801723489e-06, "loss": 17.8592, "step": 26132 }, { "epoch": 0.4776902407370172, "grad_norm": 6.293945421294758, "learning_rate": 5.602965951531316e-06, "loss": 17.4171, "step": 26133 }, { "epoch": 0.47770851993346375, "grad_norm": 8.633406434286142, "learning_rate": 5.6026720992258e-06, "loss": 17.9421, "step": 26134 }, { "epoch": 0.47772679912991023, "grad_norm": 5.846218605556748, "learning_rate": 5.602378244807969e-06, "loss": 17.2044, "step": 26135 }, { "epoch": 0.47774507832635676, "grad_norm": 5.629178104367023, "learning_rate": 5.602084388278856e-06, "loss": 17.2794, "step": 26136 }, { "epoch": 0.4777633575228033, "grad_norm": 6.874864594145727, "learning_rate": 5.601790529639488e-06, "loss": 17.5492, "step": 26137 }, { "epoch": 0.47778163671924984, "grad_norm": 8.096302984276473, "learning_rate": 5.601496668890898e-06, "loss": 17.9595, "step": 26138 }, { "epoch": 0.47779991591569637, "grad_norm": 6.4229967012341795, "learning_rate": 5.601202806034114e-06, "loss": 17.5692, "step": 26139 }, { "epoch": 0.47781819511214285, "grad_norm": 7.275263196865297, "learning_rate": 5.600908941070167e-06, "loss": 17.7041, "step": 26140 }, { "epoch": 0.4778364743085894, "grad_norm": 6.078711589615375, "learning_rate": 5.6006150740000835e-06, "loss": 17.4678, "step": 26141 }, { "epoch": 0.4778547535050359, "grad_norm": 5.8877548028571685, "learning_rate": 5.600321204824899e-06, "loss": 17.2552, "step": 26142 }, { "epoch": 0.47787303270148246, "grad_norm": 6.159760499766907, "learning_rate": 5.60002733354564e-06, "loss": 17.4516, "step": 26143 }, { "epoch": 0.477891311897929, "grad_norm": 6.668883774809692, "learning_rate": 5.5997334601633365e-06, "loss": 17.5266, "step": 26144 }, { "epoch": 0.4779095910943755, "grad_norm": 6.346232467794673, "learning_rate": 5.5994395846790194e-06, "loss": 17.2999, "step": 26145 }, { "epoch": 0.477927870290822, "grad_norm": 6.279314627877693, "learning_rate": 5.599145707093719e-06, "loss": 17.4918, "step": 26146 }, { "epoch": 0.47794614948726855, "grad_norm": 6.055620613960243, "learning_rate": 5.598851827408466e-06, "loss": 17.2214, "step": 26147 }, { "epoch": 0.4779644286837151, "grad_norm": 8.800387241080244, "learning_rate": 5.598557945624288e-06, "loss": 17.9734, "step": 26148 }, { "epoch": 0.47798270788016156, "grad_norm": 5.879055359319588, "learning_rate": 5.598264061742217e-06, "loss": 17.4847, "step": 26149 }, { "epoch": 0.4780009870766081, "grad_norm": 7.840857060702316, "learning_rate": 5.597970175763281e-06, "loss": 17.9681, "step": 26150 }, { "epoch": 0.47801926627305463, "grad_norm": 7.390125956957079, "learning_rate": 5.5976762876885114e-06, "loss": 17.9516, "step": 26151 }, { "epoch": 0.47803754546950117, "grad_norm": 7.886241989784929, "learning_rate": 5.59738239751894e-06, "loss": 18.1274, "step": 26152 }, { "epoch": 0.4780558246659477, "grad_norm": 7.6358127734958305, "learning_rate": 5.597088505255596e-06, "loss": 18.33, "step": 26153 }, { "epoch": 0.4780741038623942, "grad_norm": 5.8727765713036115, "learning_rate": 5.596794610899507e-06, "loss": 17.2083, "step": 26154 }, { "epoch": 0.4780923830588407, "grad_norm": 5.719505322466552, "learning_rate": 5.596500714451703e-06, "loss": 17.1283, "step": 26155 }, { "epoch": 0.47811066225528726, "grad_norm": 5.249643386733936, "learning_rate": 5.596206815913217e-06, "loss": 17.0474, "step": 26156 }, { "epoch": 0.4781289414517338, "grad_norm": 5.9512499632323586, "learning_rate": 5.595912915285079e-06, "loss": 17.1633, "step": 26157 }, { "epoch": 0.4781472206481803, "grad_norm": 5.539861525542652, "learning_rate": 5.595619012568318e-06, "loss": 17.0582, "step": 26158 }, { "epoch": 0.4781654998446268, "grad_norm": 6.746955695472212, "learning_rate": 5.595325107763963e-06, "loss": 17.6486, "step": 26159 }, { "epoch": 0.47818377904107334, "grad_norm": 6.372113047626275, "learning_rate": 5.595031200873045e-06, "loss": 17.3573, "step": 26160 }, { "epoch": 0.4782020582375199, "grad_norm": 5.8737005821916535, "learning_rate": 5.594737291896594e-06, "loss": 17.3061, "step": 26161 }, { "epoch": 0.4782203374339664, "grad_norm": 6.239354686610514, "learning_rate": 5.594443380835642e-06, "loss": 17.6471, "step": 26162 }, { "epoch": 0.47823861663041295, "grad_norm": 6.4618147944618105, "learning_rate": 5.5941494676912165e-06, "loss": 17.5302, "step": 26163 }, { "epoch": 0.47825689582685943, "grad_norm": 6.8845883561397265, "learning_rate": 5.593855552464348e-06, "loss": 17.5376, "step": 26164 }, { "epoch": 0.47827517502330597, "grad_norm": 6.808300698691769, "learning_rate": 5.593561635156068e-06, "loss": 17.4821, "step": 26165 }, { "epoch": 0.4782934542197525, "grad_norm": 5.980524909712036, "learning_rate": 5.593267715767406e-06, "loss": 17.0913, "step": 26166 }, { "epoch": 0.47831173341619904, "grad_norm": 5.126226771227641, "learning_rate": 5.592973794299393e-06, "loss": 17.0242, "step": 26167 }, { "epoch": 0.47833001261264557, "grad_norm": 6.136243881740379, "learning_rate": 5.592679870753057e-06, "loss": 17.3082, "step": 26168 }, { "epoch": 0.47834829180909205, "grad_norm": 6.093848300755434, "learning_rate": 5.59238594512943e-06, "loss": 17.2224, "step": 26169 }, { "epoch": 0.4783665710055386, "grad_norm": 5.297080451154436, "learning_rate": 5.592092017429543e-06, "loss": 17.2881, "step": 26170 }, { "epoch": 0.4783848502019851, "grad_norm": 6.05727917693309, "learning_rate": 5.5917980876544235e-06, "loss": 17.2438, "step": 26171 }, { "epoch": 0.47840312939843166, "grad_norm": 7.863517824378537, "learning_rate": 5.591504155805103e-06, "loss": 18.2442, "step": 26172 }, { "epoch": 0.4784214085948782, "grad_norm": 8.058677058472782, "learning_rate": 5.591210221882611e-06, "loss": 17.993, "step": 26173 }, { "epoch": 0.4784396877913247, "grad_norm": 7.495887265653726, "learning_rate": 5.590916285887979e-06, "loss": 18.1109, "step": 26174 }, { "epoch": 0.4784579669877712, "grad_norm": 6.267948127689959, "learning_rate": 5.590622347822238e-06, "loss": 17.4014, "step": 26175 }, { "epoch": 0.47847624618421775, "grad_norm": 6.949861333830357, "learning_rate": 5.590328407686415e-06, "loss": 17.9126, "step": 26176 }, { "epoch": 0.4784945253806643, "grad_norm": 9.55325244105705, "learning_rate": 5.590034465481544e-06, "loss": 17.4078, "step": 26177 }, { "epoch": 0.4785128045771108, "grad_norm": 7.621456997317617, "learning_rate": 5.589740521208652e-06, "loss": 17.6216, "step": 26178 }, { "epoch": 0.4785310837735573, "grad_norm": 6.386817809825003, "learning_rate": 5.589446574868771e-06, "loss": 17.2495, "step": 26179 }, { "epoch": 0.47854936297000383, "grad_norm": 5.700295189066984, "learning_rate": 5.589152626462933e-06, "loss": 17.2569, "step": 26180 }, { "epoch": 0.47856764216645037, "grad_norm": 5.936096719381702, "learning_rate": 5.588858675992164e-06, "loss": 17.2734, "step": 26181 }, { "epoch": 0.4785859213628969, "grad_norm": 6.098904815202431, "learning_rate": 5.5885647234574946e-06, "loss": 17.3035, "step": 26182 }, { "epoch": 0.4786042005593434, "grad_norm": 7.148858141497193, "learning_rate": 5.588270768859959e-06, "loss": 17.6, "step": 26183 }, { "epoch": 0.4786224797557899, "grad_norm": 6.168080790795707, "learning_rate": 5.587976812200587e-06, "loss": 17.3124, "step": 26184 }, { "epoch": 0.47864075895223646, "grad_norm": 5.496299195925315, "learning_rate": 5.587682853480405e-06, "loss": 17.1208, "step": 26185 }, { "epoch": 0.478659038148683, "grad_norm": 6.916940082572477, "learning_rate": 5.587388892700446e-06, "loss": 17.675, "step": 26186 }, { "epoch": 0.4786773173451295, "grad_norm": 6.065550357992969, "learning_rate": 5.587094929861741e-06, "loss": 17.3024, "step": 26187 }, { "epoch": 0.478695596541576, "grad_norm": 5.804851435310755, "learning_rate": 5.586800964965318e-06, "loss": 17.29, "step": 26188 }, { "epoch": 0.47871387573802254, "grad_norm": 6.033602700131579, "learning_rate": 5.586506998012209e-06, "loss": 17.2692, "step": 26189 }, { "epoch": 0.4787321549344691, "grad_norm": 6.663829582698504, "learning_rate": 5.586213029003443e-06, "loss": 17.7129, "step": 26190 }, { "epoch": 0.4787504341309156, "grad_norm": 6.951364507725383, "learning_rate": 5.5859190579400526e-06, "loss": 17.7884, "step": 26191 }, { "epoch": 0.47876871332736215, "grad_norm": 9.055546153627796, "learning_rate": 5.585625084823066e-06, "loss": 18.6385, "step": 26192 }, { "epoch": 0.47878699252380863, "grad_norm": 6.9846995043625855, "learning_rate": 5.5853311096535145e-06, "loss": 17.5807, "step": 26193 }, { "epoch": 0.47880527172025517, "grad_norm": 6.732692477602404, "learning_rate": 5.585037132432429e-06, "loss": 17.4702, "step": 26194 }, { "epoch": 0.4788235509167017, "grad_norm": 5.727818276585686, "learning_rate": 5.584743153160837e-06, "loss": 17.4588, "step": 26195 }, { "epoch": 0.47884183011314824, "grad_norm": 8.451719454867723, "learning_rate": 5.584449171839772e-06, "loss": 18.1792, "step": 26196 }, { "epoch": 0.4788601093095948, "grad_norm": 6.900812623289093, "learning_rate": 5.584155188470264e-06, "loss": 17.6951, "step": 26197 }, { "epoch": 0.47887838850604125, "grad_norm": 6.976842411357199, "learning_rate": 5.583861203053344e-06, "loss": 17.7989, "step": 26198 }, { "epoch": 0.4788966677024878, "grad_norm": 5.910355358761967, "learning_rate": 5.583567215590039e-06, "loss": 17.2434, "step": 26199 }, { "epoch": 0.4789149468989343, "grad_norm": 6.175191662204462, "learning_rate": 5.583273226081381e-06, "loss": 17.4369, "step": 26200 }, { "epoch": 0.47893322609538086, "grad_norm": 6.404927060162966, "learning_rate": 5.5829792345284025e-06, "loss": 17.2073, "step": 26201 }, { "epoch": 0.4789515052918274, "grad_norm": 7.6688649647097415, "learning_rate": 5.5826852409321316e-06, "loss": 18.2065, "step": 26202 }, { "epoch": 0.4789697844882739, "grad_norm": 7.757099773467878, "learning_rate": 5.5823912452936e-06, "loss": 18.0641, "step": 26203 }, { "epoch": 0.4789880636847204, "grad_norm": 5.8128483224136795, "learning_rate": 5.582097247613838e-06, "loss": 17.2436, "step": 26204 }, { "epoch": 0.47900634288116695, "grad_norm": 6.405755405230303, "learning_rate": 5.581803247893876e-06, "loss": 17.6274, "step": 26205 }, { "epoch": 0.4790246220776135, "grad_norm": 8.570545696432772, "learning_rate": 5.581509246134742e-06, "loss": 18.6291, "step": 26206 }, { "epoch": 0.47904290127406, "grad_norm": 6.156591387375951, "learning_rate": 5.58121524233747e-06, "loss": 17.24, "step": 26207 }, { "epoch": 0.4790611804705065, "grad_norm": 6.623992225506808, "learning_rate": 5.58092123650309e-06, "loss": 17.5872, "step": 26208 }, { "epoch": 0.47907945966695303, "grad_norm": 5.216852011450072, "learning_rate": 5.580627228632629e-06, "loss": 16.8767, "step": 26209 }, { "epoch": 0.47909773886339957, "grad_norm": 7.510683482527872, "learning_rate": 5.580333218727121e-06, "loss": 17.9773, "step": 26210 }, { "epoch": 0.4791160180598461, "grad_norm": 5.75592348346135, "learning_rate": 5.580039206787597e-06, "loss": 17.1502, "step": 26211 }, { "epoch": 0.47913429725629264, "grad_norm": 5.907665321789651, "learning_rate": 5.579745192815085e-06, "loss": 17.0974, "step": 26212 }, { "epoch": 0.4791525764527391, "grad_norm": 6.292182669194808, "learning_rate": 5.579451176810615e-06, "loss": 17.2361, "step": 26213 }, { "epoch": 0.47917085564918566, "grad_norm": 7.204820704388497, "learning_rate": 5.5791571587752195e-06, "loss": 18.0221, "step": 26214 }, { "epoch": 0.4791891348456322, "grad_norm": 5.484157167412335, "learning_rate": 5.578863138709929e-06, "loss": 17.1521, "step": 26215 }, { "epoch": 0.4792074140420787, "grad_norm": 5.744855348578453, "learning_rate": 5.578569116615773e-06, "loss": 17.4221, "step": 26216 }, { "epoch": 0.4792256932385252, "grad_norm": 7.220814378367754, "learning_rate": 5.578275092493783e-06, "loss": 17.7878, "step": 26217 }, { "epoch": 0.47924397243497174, "grad_norm": 6.328796412840449, "learning_rate": 5.577981066344988e-06, "loss": 17.4949, "step": 26218 }, { "epoch": 0.4792622516314183, "grad_norm": 6.5617144161473515, "learning_rate": 5.577687038170421e-06, "loss": 17.2807, "step": 26219 }, { "epoch": 0.4792805308278648, "grad_norm": 6.495003736669981, "learning_rate": 5.5773930079711105e-06, "loss": 17.3757, "step": 26220 }, { "epoch": 0.47929881002431135, "grad_norm": 6.865996755807059, "learning_rate": 5.5770989757480865e-06, "loss": 17.6212, "step": 26221 }, { "epoch": 0.47931708922075783, "grad_norm": 6.020183196247202, "learning_rate": 5.576804941502382e-06, "loss": 17.2654, "step": 26222 }, { "epoch": 0.47933536841720437, "grad_norm": 6.099741301700175, "learning_rate": 5.576510905235025e-06, "loss": 17.3339, "step": 26223 }, { "epoch": 0.4793536476136509, "grad_norm": 5.4067937833161235, "learning_rate": 5.576216866947048e-06, "loss": 17.0633, "step": 26224 }, { "epoch": 0.47937192681009744, "grad_norm": 7.247404031681719, "learning_rate": 5.575922826639483e-06, "loss": 17.6743, "step": 26225 }, { "epoch": 0.479390206006544, "grad_norm": 6.605198334954851, "learning_rate": 5.575628784313356e-06, "loss": 17.6374, "step": 26226 }, { "epoch": 0.47940848520299045, "grad_norm": 7.6240696437546935, "learning_rate": 5.575334739969699e-06, "loss": 18.268, "step": 26227 }, { "epoch": 0.479426764399437, "grad_norm": 7.378431079986668, "learning_rate": 5.5750406936095445e-06, "loss": 17.9383, "step": 26228 }, { "epoch": 0.4794450435958835, "grad_norm": 5.394760685278859, "learning_rate": 5.574746645233924e-06, "loss": 17.1321, "step": 26229 }, { "epoch": 0.47946332279233006, "grad_norm": 5.730823542829997, "learning_rate": 5.574452594843865e-06, "loss": 17.4686, "step": 26230 }, { "epoch": 0.4794816019887766, "grad_norm": 6.081860419800946, "learning_rate": 5.5741585424404e-06, "loss": 17.7798, "step": 26231 }, { "epoch": 0.4794998811852231, "grad_norm": 6.530243472618639, "learning_rate": 5.57386448802456e-06, "loss": 17.6541, "step": 26232 }, { "epoch": 0.4795181603816696, "grad_norm": 6.6227555503740385, "learning_rate": 5.573570431597373e-06, "loss": 17.4864, "step": 26233 }, { "epoch": 0.47953643957811615, "grad_norm": 4.923635154528669, "learning_rate": 5.573276373159872e-06, "loss": 16.9674, "step": 26234 }, { "epoch": 0.4795547187745627, "grad_norm": 5.603616071046356, "learning_rate": 5.572982312713087e-06, "loss": 17.2972, "step": 26235 }, { "epoch": 0.4795729979710092, "grad_norm": 6.893350260820897, "learning_rate": 5.572688250258048e-06, "loss": 17.7171, "step": 26236 }, { "epoch": 0.4795912771674557, "grad_norm": 5.620651305897151, "learning_rate": 5.572394185795787e-06, "loss": 17.3509, "step": 26237 }, { "epoch": 0.47960955636390223, "grad_norm": 6.51119153220209, "learning_rate": 5.572100119327335e-06, "loss": 17.7156, "step": 26238 }, { "epoch": 0.47962783556034877, "grad_norm": 5.026318614404321, "learning_rate": 5.571806050853722e-06, "loss": 17.0207, "step": 26239 }, { "epoch": 0.4796461147567953, "grad_norm": 5.384751585291172, "learning_rate": 5.571511980375977e-06, "loss": 17.1038, "step": 26240 }, { "epoch": 0.47966439395324184, "grad_norm": 6.795563613774651, "learning_rate": 5.5712179078951325e-06, "loss": 17.2896, "step": 26241 }, { "epoch": 0.4796826731496883, "grad_norm": 6.604792047533484, "learning_rate": 5.5709238334122194e-06, "loss": 17.6891, "step": 26242 }, { "epoch": 0.47970095234613486, "grad_norm": 5.920415462727743, "learning_rate": 5.570629756928267e-06, "loss": 17.2459, "step": 26243 }, { "epoch": 0.4797192315425814, "grad_norm": 7.0716277174869555, "learning_rate": 5.570335678444308e-06, "loss": 17.948, "step": 26244 }, { "epoch": 0.47973751073902793, "grad_norm": 7.241241562813814, "learning_rate": 5.57004159796137e-06, "loss": 17.8781, "step": 26245 }, { "epoch": 0.47975578993547446, "grad_norm": 6.244955681893017, "learning_rate": 5.569747515480487e-06, "loss": 17.6744, "step": 26246 }, { "epoch": 0.47977406913192094, "grad_norm": 5.550364666379726, "learning_rate": 5.569453431002687e-06, "loss": 17.098, "step": 26247 }, { "epoch": 0.4797923483283675, "grad_norm": 6.5128953084245085, "learning_rate": 5.569159344529004e-06, "loss": 17.5295, "step": 26248 }, { "epoch": 0.479810627524814, "grad_norm": 6.53858048588471, "learning_rate": 5.568865256060466e-06, "loss": 17.5188, "step": 26249 }, { "epoch": 0.47982890672126055, "grad_norm": 5.346609906075889, "learning_rate": 5.568571165598104e-06, "loss": 17.0783, "step": 26250 }, { "epoch": 0.47984718591770703, "grad_norm": 7.49948746741147, "learning_rate": 5.56827707314295e-06, "loss": 18.0951, "step": 26251 }, { "epoch": 0.47986546511415357, "grad_norm": 9.32404418977323, "learning_rate": 5.567982978696035e-06, "loss": 19.117, "step": 26252 }, { "epoch": 0.4798837443106001, "grad_norm": 6.0266094390333045, "learning_rate": 5.5676888822583884e-06, "loss": 17.4078, "step": 26253 }, { "epoch": 0.47990202350704664, "grad_norm": 6.420340507834126, "learning_rate": 5.567394783831041e-06, "loss": 17.7002, "step": 26254 }, { "epoch": 0.4799203027034932, "grad_norm": 6.72038526407255, "learning_rate": 5.567100683415025e-06, "loss": 17.562, "step": 26255 }, { "epoch": 0.47993858189993965, "grad_norm": 6.125386102652748, "learning_rate": 5.56680658101137e-06, "loss": 17.4068, "step": 26256 }, { "epoch": 0.4799568610963862, "grad_norm": 7.086812921516251, "learning_rate": 5.566512476621106e-06, "loss": 17.5719, "step": 26257 }, { "epoch": 0.4799751402928327, "grad_norm": 5.5893929247853675, "learning_rate": 5.5662183702452665e-06, "loss": 17.3398, "step": 26258 }, { "epoch": 0.47999341948927926, "grad_norm": 5.584706944612587, "learning_rate": 5.5659242618848785e-06, "loss": 16.944, "step": 26259 }, { "epoch": 0.4800116986857258, "grad_norm": 5.61440570770011, "learning_rate": 5.565630151540978e-06, "loss": 17.2652, "step": 26260 }, { "epoch": 0.4800299778821723, "grad_norm": 7.016142058233312, "learning_rate": 5.5653360392145914e-06, "loss": 18.0383, "step": 26261 }, { "epoch": 0.4800482570786188, "grad_norm": 6.160932323324327, "learning_rate": 5.5650419249067514e-06, "loss": 17.2671, "step": 26262 }, { "epoch": 0.48006653627506535, "grad_norm": 8.763301565907854, "learning_rate": 5.564747808618488e-06, "loss": 18.2968, "step": 26263 }, { "epoch": 0.4800848154715119, "grad_norm": 5.798588579926526, "learning_rate": 5.564453690350833e-06, "loss": 17.0508, "step": 26264 }, { "epoch": 0.4801030946679584, "grad_norm": 5.1590241961317655, "learning_rate": 5.564159570104817e-06, "loss": 16.9717, "step": 26265 }, { "epoch": 0.4801213738644049, "grad_norm": 6.837219230012546, "learning_rate": 5.56386544788147e-06, "loss": 17.3461, "step": 26266 }, { "epoch": 0.48013965306085143, "grad_norm": 6.810932047800776, "learning_rate": 5.563571323681825e-06, "loss": 17.9143, "step": 26267 }, { "epoch": 0.48015793225729797, "grad_norm": 5.903412998805857, "learning_rate": 5.5632771975069085e-06, "loss": 17.2265, "step": 26268 }, { "epoch": 0.4801762114537445, "grad_norm": 7.445933863773643, "learning_rate": 5.562983069357757e-06, "loss": 17.8089, "step": 26269 }, { "epoch": 0.48019449065019104, "grad_norm": 5.859780627223239, "learning_rate": 5.562688939235398e-06, "loss": 17.1932, "step": 26270 }, { "epoch": 0.4802127698466375, "grad_norm": 7.763470278693377, "learning_rate": 5.562394807140863e-06, "loss": 17.9588, "step": 26271 }, { "epoch": 0.48023104904308406, "grad_norm": 7.187215510049689, "learning_rate": 5.5621006730751825e-06, "loss": 17.8856, "step": 26272 }, { "epoch": 0.4802493282395306, "grad_norm": 6.174061876007868, "learning_rate": 5.561806537039388e-06, "loss": 17.5122, "step": 26273 }, { "epoch": 0.48026760743597713, "grad_norm": 4.817893880664546, "learning_rate": 5.561512399034511e-06, "loss": 16.8486, "step": 26274 }, { "epoch": 0.48028588663242366, "grad_norm": 5.405630905271116, "learning_rate": 5.5612182590615815e-06, "loss": 17.0686, "step": 26275 }, { "epoch": 0.48030416582887014, "grad_norm": 5.733040557614441, "learning_rate": 5.56092411712163e-06, "loss": 17.444, "step": 26276 }, { "epoch": 0.4803224450253167, "grad_norm": 7.022229118587633, "learning_rate": 5.560629973215688e-06, "loss": 17.797, "step": 26277 }, { "epoch": 0.4803407242217632, "grad_norm": 7.0566013658705575, "learning_rate": 5.5603358273447886e-06, "loss": 17.8137, "step": 26278 }, { "epoch": 0.48035900341820975, "grad_norm": 6.799484733560736, "learning_rate": 5.560041679509959e-06, "loss": 17.6116, "step": 26279 }, { "epoch": 0.4803772826146563, "grad_norm": 4.995210153192633, "learning_rate": 5.559747529712234e-06, "loss": 16.7933, "step": 26280 }, { "epoch": 0.48039556181110277, "grad_norm": 11.211654346671791, "learning_rate": 5.559453377952641e-06, "loss": 17.997, "step": 26281 }, { "epoch": 0.4804138410075493, "grad_norm": 5.8848111032914066, "learning_rate": 5.55915922423221e-06, "loss": 17.2252, "step": 26282 }, { "epoch": 0.48043212020399584, "grad_norm": 5.2625305885375075, "learning_rate": 5.558865068551978e-06, "loss": 17.0976, "step": 26283 }, { "epoch": 0.4804503994004424, "grad_norm": 6.16618166177873, "learning_rate": 5.558570910912971e-06, "loss": 17.6266, "step": 26284 }, { "epoch": 0.48046867859688885, "grad_norm": 6.671207237377224, "learning_rate": 5.558276751316222e-06, "loss": 17.9212, "step": 26285 }, { "epoch": 0.4804869577933354, "grad_norm": 6.5691155807881945, "learning_rate": 5.55798258976276e-06, "loss": 17.658, "step": 26286 }, { "epoch": 0.4805052369897819, "grad_norm": 5.148630452808944, "learning_rate": 5.557688426253619e-06, "loss": 16.9711, "step": 26287 }, { "epoch": 0.48052351618622846, "grad_norm": 6.101104961735287, "learning_rate": 5.557394260789828e-06, "loss": 17.5516, "step": 26288 }, { "epoch": 0.480541795382675, "grad_norm": 6.030352061449858, "learning_rate": 5.557100093372418e-06, "loss": 17.1955, "step": 26289 }, { "epoch": 0.4805600745791215, "grad_norm": 6.568361226260854, "learning_rate": 5.556805924002421e-06, "loss": 17.5732, "step": 26290 }, { "epoch": 0.480578353775568, "grad_norm": 7.811923646920016, "learning_rate": 5.5565117526808675e-06, "loss": 18.1357, "step": 26291 }, { "epoch": 0.48059663297201455, "grad_norm": 6.420016928709315, "learning_rate": 5.556217579408789e-06, "loss": 17.7828, "step": 26292 }, { "epoch": 0.4806149121684611, "grad_norm": 7.340951696991889, "learning_rate": 5.555923404187216e-06, "loss": 18.1184, "step": 26293 }, { "epoch": 0.4806331913649076, "grad_norm": 6.170264613948992, "learning_rate": 5.5556292270171796e-06, "loss": 17.3478, "step": 26294 }, { "epoch": 0.4806514705613541, "grad_norm": 7.3686494293047, "learning_rate": 5.5553350478997105e-06, "loss": 18.1051, "step": 26295 }, { "epoch": 0.48066974975780064, "grad_norm": 6.20315733126409, "learning_rate": 5.55504086683584e-06, "loss": 17.4688, "step": 26296 }, { "epoch": 0.48068802895424717, "grad_norm": 6.279347818895089, "learning_rate": 5.5547466838265995e-06, "loss": 17.2298, "step": 26297 }, { "epoch": 0.4807063081506937, "grad_norm": 7.263587220888498, "learning_rate": 5.554452498873022e-06, "loss": 17.6596, "step": 26298 }, { "epoch": 0.48072458734714024, "grad_norm": 5.879391009640084, "learning_rate": 5.5541583119761345e-06, "loss": 17.3132, "step": 26299 }, { "epoch": 0.4807428665435867, "grad_norm": 5.67632039881883, "learning_rate": 5.55386412313697e-06, "loss": 17.0772, "step": 26300 }, { "epoch": 0.48076114574003326, "grad_norm": 5.4965637625749135, "learning_rate": 5.553569932356561e-06, "loss": 17.4225, "step": 26301 }, { "epoch": 0.4807794249364798, "grad_norm": 7.718166568010055, "learning_rate": 5.553275739635938e-06, "loss": 17.6912, "step": 26302 }, { "epoch": 0.48079770413292633, "grad_norm": 7.331301042963287, "learning_rate": 5.55298154497613e-06, "loss": 17.9998, "step": 26303 }, { "epoch": 0.48081598332937286, "grad_norm": 7.30181352093863, "learning_rate": 5.552687348378171e-06, "loss": 17.9985, "step": 26304 }, { "epoch": 0.48083426252581934, "grad_norm": 6.1782655559328425, "learning_rate": 5.552393149843089e-06, "loss": 17.6233, "step": 26305 }, { "epoch": 0.4808525417222659, "grad_norm": 6.840889730160085, "learning_rate": 5.552098949371918e-06, "loss": 17.8455, "step": 26306 }, { "epoch": 0.4808708209187124, "grad_norm": 6.504445684199956, "learning_rate": 5.551804746965689e-06, "loss": 17.5115, "step": 26307 }, { "epoch": 0.48088910011515895, "grad_norm": 6.329447353544942, "learning_rate": 5.551510542625433e-06, "loss": 17.6087, "step": 26308 }, { "epoch": 0.4809073793116055, "grad_norm": 5.539137613769063, "learning_rate": 5.551216336352176e-06, "loss": 17.0746, "step": 26309 }, { "epoch": 0.48092565850805197, "grad_norm": 6.315593296203523, "learning_rate": 5.550922128146957e-06, "loss": 17.6492, "step": 26310 }, { "epoch": 0.4809439377044985, "grad_norm": 6.286549505811321, "learning_rate": 5.550627918010804e-06, "loss": 17.3756, "step": 26311 }, { "epoch": 0.48096221690094504, "grad_norm": 7.438101299101056, "learning_rate": 5.550333705944747e-06, "loss": 17.9737, "step": 26312 }, { "epoch": 0.4809804960973916, "grad_norm": 5.016704469711039, "learning_rate": 5.550039491949818e-06, "loss": 16.9689, "step": 26313 }, { "epoch": 0.4809987752938381, "grad_norm": 5.832696566777145, "learning_rate": 5.549745276027047e-06, "loss": 17.6528, "step": 26314 }, { "epoch": 0.4810170544902846, "grad_norm": 5.482277567660579, "learning_rate": 5.54945105817747e-06, "loss": 16.9582, "step": 26315 }, { "epoch": 0.4810353336867311, "grad_norm": 6.803837274527475, "learning_rate": 5.5491568384021125e-06, "loss": 17.9034, "step": 26316 }, { "epoch": 0.48105361288317766, "grad_norm": 6.9583309770867885, "learning_rate": 5.548862616702008e-06, "loss": 17.8081, "step": 26317 }, { "epoch": 0.4810718920796242, "grad_norm": 8.33771885405574, "learning_rate": 5.548568393078188e-06, "loss": 18.432, "step": 26318 }, { "epoch": 0.4810901712760707, "grad_norm": 5.976827166786, "learning_rate": 5.548274167531682e-06, "loss": 17.3933, "step": 26319 }, { "epoch": 0.4811084504725172, "grad_norm": 7.0006852407067495, "learning_rate": 5.547979940063524e-06, "loss": 17.5429, "step": 26320 }, { "epoch": 0.48112672966896375, "grad_norm": 5.994212461893175, "learning_rate": 5.547685710674744e-06, "loss": 17.2477, "step": 26321 }, { "epoch": 0.4811450088654103, "grad_norm": 5.40182894286935, "learning_rate": 5.547391479366372e-06, "loss": 17.0529, "step": 26322 }, { "epoch": 0.4811632880618568, "grad_norm": 6.683537961834204, "learning_rate": 5.547097246139441e-06, "loss": 17.5788, "step": 26323 }, { "epoch": 0.4811815672583033, "grad_norm": 6.985561446856629, "learning_rate": 5.546803010994982e-06, "loss": 17.7265, "step": 26324 }, { "epoch": 0.48119984645474984, "grad_norm": 6.762442212134518, "learning_rate": 5.546508773934026e-06, "loss": 17.568, "step": 26325 }, { "epoch": 0.48121812565119637, "grad_norm": 6.166687649428789, "learning_rate": 5.5462145349576046e-06, "loss": 17.3994, "step": 26326 }, { "epoch": 0.4812364048476429, "grad_norm": 5.78866726007588, "learning_rate": 5.545920294066747e-06, "loss": 17.1952, "step": 26327 }, { "epoch": 0.48125468404408944, "grad_norm": 7.8089709936525855, "learning_rate": 5.545626051262486e-06, "loss": 17.8199, "step": 26328 }, { "epoch": 0.4812729632405359, "grad_norm": 5.730696785860018, "learning_rate": 5.545331806545855e-06, "loss": 17.2917, "step": 26329 }, { "epoch": 0.48129124243698246, "grad_norm": 6.010027176161514, "learning_rate": 5.545037559917883e-06, "loss": 17.3959, "step": 26330 }, { "epoch": 0.481309521633429, "grad_norm": 6.660744577614647, "learning_rate": 5.5447433113796e-06, "loss": 17.3296, "step": 26331 }, { "epoch": 0.48132780082987553, "grad_norm": 6.7999284135408775, "learning_rate": 5.54444906093204e-06, "loss": 17.5348, "step": 26332 }, { "epoch": 0.48134608002632207, "grad_norm": 6.717869793306183, "learning_rate": 5.544154808576235e-06, "loss": 17.8168, "step": 26333 }, { "epoch": 0.48136435922276855, "grad_norm": 7.7918479956045354, "learning_rate": 5.543860554313212e-06, "loss": 17.5697, "step": 26334 }, { "epoch": 0.4813826384192151, "grad_norm": 5.3385532128671915, "learning_rate": 5.543566298144005e-06, "loss": 17.0516, "step": 26335 }, { "epoch": 0.4814009176156616, "grad_norm": 5.446983480464271, "learning_rate": 5.543272040069646e-06, "loss": 17.1035, "step": 26336 }, { "epoch": 0.48141919681210815, "grad_norm": 7.361994189029218, "learning_rate": 5.542977780091166e-06, "loss": 17.4346, "step": 26337 }, { "epoch": 0.4814374760085547, "grad_norm": 5.831175749993607, "learning_rate": 5.542683518209596e-06, "loss": 17.2801, "step": 26338 }, { "epoch": 0.48145575520500117, "grad_norm": 5.320647097754033, "learning_rate": 5.5423892544259685e-06, "loss": 17.0893, "step": 26339 }, { "epoch": 0.4814740344014477, "grad_norm": 4.948055325539974, "learning_rate": 5.542094988741311e-06, "loss": 16.8273, "step": 26340 }, { "epoch": 0.48149231359789424, "grad_norm": 6.744933085245579, "learning_rate": 5.541800721156658e-06, "loss": 17.8702, "step": 26341 }, { "epoch": 0.4815105927943408, "grad_norm": 6.903769133505602, "learning_rate": 5.541506451673043e-06, "loss": 17.9442, "step": 26342 }, { "epoch": 0.4815288719907873, "grad_norm": 5.061867908918671, "learning_rate": 5.541212180291493e-06, "loss": 17.1038, "step": 26343 }, { "epoch": 0.4815471511872338, "grad_norm": 7.448512656050445, "learning_rate": 5.540917907013041e-06, "loss": 18.2131, "step": 26344 }, { "epoch": 0.4815654303836803, "grad_norm": 6.354973975214358, "learning_rate": 5.54062363183872e-06, "loss": 17.4036, "step": 26345 }, { "epoch": 0.48158370958012686, "grad_norm": 6.487779165859003, "learning_rate": 5.540329354769559e-06, "loss": 17.4813, "step": 26346 }, { "epoch": 0.4816019887765734, "grad_norm": 6.461643148634538, "learning_rate": 5.540035075806591e-06, "loss": 17.2318, "step": 26347 }, { "epoch": 0.48162026797301993, "grad_norm": 6.7665501526774685, "learning_rate": 5.539740794950846e-06, "loss": 17.7662, "step": 26348 }, { "epoch": 0.4816385471694664, "grad_norm": 7.327919960970281, "learning_rate": 5.539446512203358e-06, "loss": 17.7267, "step": 26349 }, { "epoch": 0.48165682636591295, "grad_norm": 7.199502002435817, "learning_rate": 5.5391522275651555e-06, "loss": 17.7265, "step": 26350 }, { "epoch": 0.4816751055623595, "grad_norm": 6.1694956664444325, "learning_rate": 5.538857941037272e-06, "loss": 17.4255, "step": 26351 }, { "epoch": 0.481693384758806, "grad_norm": 7.261349513143847, "learning_rate": 5.538563652620738e-06, "loss": 18.0629, "step": 26352 }, { "epoch": 0.4817116639552525, "grad_norm": 7.2562196600869715, "learning_rate": 5.538269362316585e-06, "loss": 17.7093, "step": 26353 }, { "epoch": 0.48172994315169904, "grad_norm": 7.815595500800447, "learning_rate": 5.537975070125844e-06, "loss": 18.2068, "step": 26354 }, { "epoch": 0.48174822234814557, "grad_norm": 8.912954167386891, "learning_rate": 5.537680776049547e-06, "loss": 17.9742, "step": 26355 }, { "epoch": 0.4817665015445921, "grad_norm": 5.379099899561392, "learning_rate": 5.537386480088728e-06, "loss": 17.3032, "step": 26356 }, { "epoch": 0.48178478074103864, "grad_norm": 5.833575098529954, "learning_rate": 5.537092182244414e-06, "loss": 17.1715, "step": 26357 }, { "epoch": 0.4818030599374851, "grad_norm": 5.944524551931755, "learning_rate": 5.536797882517639e-06, "loss": 17.4551, "step": 26358 }, { "epoch": 0.48182133913393166, "grad_norm": 4.406158916859859, "learning_rate": 5.5365035809094315e-06, "loss": 16.7808, "step": 26359 }, { "epoch": 0.4818396183303782, "grad_norm": 6.482350089546045, "learning_rate": 5.536209277420829e-06, "loss": 17.5317, "step": 26360 }, { "epoch": 0.48185789752682473, "grad_norm": 8.447406297336544, "learning_rate": 5.5359149720528586e-06, "loss": 17.653, "step": 26361 }, { "epoch": 0.48187617672327127, "grad_norm": 7.80788670441757, "learning_rate": 5.535620664806551e-06, "loss": 18.283, "step": 26362 }, { "epoch": 0.48189445591971775, "grad_norm": 5.911683566350858, "learning_rate": 5.535326355682942e-06, "loss": 17.5112, "step": 26363 }, { "epoch": 0.4819127351161643, "grad_norm": 6.646164979700029, "learning_rate": 5.5350320446830585e-06, "loss": 17.9519, "step": 26364 }, { "epoch": 0.4819310143126108, "grad_norm": 5.971546840920929, "learning_rate": 5.534737731807935e-06, "loss": 17.2452, "step": 26365 }, { "epoch": 0.48194929350905735, "grad_norm": 6.922578573049142, "learning_rate": 5.534443417058602e-06, "loss": 17.9595, "step": 26366 }, { "epoch": 0.4819675727055039, "grad_norm": 7.729067852830623, "learning_rate": 5.534149100436092e-06, "loss": 17.5947, "step": 26367 }, { "epoch": 0.48198585190195037, "grad_norm": 5.921488170399348, "learning_rate": 5.533854781941435e-06, "loss": 17.4843, "step": 26368 }, { "epoch": 0.4820041310983969, "grad_norm": 6.099905150420779, "learning_rate": 5.533560461575663e-06, "loss": 17.4936, "step": 26369 }, { "epoch": 0.48202241029484344, "grad_norm": 5.2078788116215, "learning_rate": 5.533266139339809e-06, "loss": 17.0594, "step": 26370 }, { "epoch": 0.48204068949129, "grad_norm": 5.344458387455696, "learning_rate": 5.5329718152349036e-06, "loss": 17.1222, "step": 26371 }, { "epoch": 0.4820589686877365, "grad_norm": 5.759422879294768, "learning_rate": 5.532677489261976e-06, "loss": 17.1989, "step": 26372 }, { "epoch": 0.482077247884183, "grad_norm": 6.27210077383447, "learning_rate": 5.532383161422061e-06, "loss": 17.4011, "step": 26373 }, { "epoch": 0.4820955270806295, "grad_norm": 6.975288594122639, "learning_rate": 5.532088831716191e-06, "loss": 17.5764, "step": 26374 }, { "epoch": 0.48211380627707606, "grad_norm": 7.2013189700389875, "learning_rate": 5.531794500145394e-06, "loss": 17.7035, "step": 26375 }, { "epoch": 0.4821320854735226, "grad_norm": 5.93673221939514, "learning_rate": 5.531500166710704e-06, "loss": 17.3782, "step": 26376 }, { "epoch": 0.48215036466996913, "grad_norm": 6.2951549386612635, "learning_rate": 5.5312058314131515e-06, "loss": 17.537, "step": 26377 }, { "epoch": 0.4821686438664156, "grad_norm": 5.095930937214379, "learning_rate": 5.530911494253769e-06, "loss": 17.0357, "step": 26378 }, { "epoch": 0.48218692306286215, "grad_norm": 6.595069307076094, "learning_rate": 5.530617155233588e-06, "loss": 17.6362, "step": 26379 }, { "epoch": 0.4822052022593087, "grad_norm": 5.227657714040226, "learning_rate": 5.530322814353641e-06, "loss": 16.9308, "step": 26380 }, { "epoch": 0.4822234814557552, "grad_norm": 5.635523305027515, "learning_rate": 5.530028471614955e-06, "loss": 17.263, "step": 26381 }, { "epoch": 0.48224176065220176, "grad_norm": 5.44696137512229, "learning_rate": 5.529734127018568e-06, "loss": 17.0705, "step": 26382 }, { "epoch": 0.48226003984864824, "grad_norm": 5.997484849082562, "learning_rate": 5.529439780565509e-06, "loss": 17.4066, "step": 26383 }, { "epoch": 0.48227831904509477, "grad_norm": 6.628561715389882, "learning_rate": 5.529145432256809e-06, "loss": 17.7799, "step": 26384 }, { "epoch": 0.4822965982415413, "grad_norm": 6.430481647033819, "learning_rate": 5.5288510820935005e-06, "loss": 17.3636, "step": 26385 }, { "epoch": 0.48231487743798784, "grad_norm": 7.03515482701866, "learning_rate": 5.528556730076613e-06, "loss": 17.7708, "step": 26386 }, { "epoch": 0.4823331566344343, "grad_norm": 6.0235610521984135, "learning_rate": 5.528262376207182e-06, "loss": 17.2265, "step": 26387 }, { "epoch": 0.48235143583088086, "grad_norm": 6.569044577328092, "learning_rate": 5.527968020486237e-06, "loss": 17.2592, "step": 26388 }, { "epoch": 0.4823697150273274, "grad_norm": 6.5548474387297855, "learning_rate": 5.52767366291481e-06, "loss": 17.3231, "step": 26389 }, { "epoch": 0.48238799422377393, "grad_norm": 6.309354915714875, "learning_rate": 5.527379303493932e-06, "loss": 17.197, "step": 26390 }, { "epoch": 0.48240627342022047, "grad_norm": 6.195226704317913, "learning_rate": 5.527084942224635e-06, "loss": 17.8131, "step": 26391 }, { "epoch": 0.48242455261666695, "grad_norm": 5.912727609028651, "learning_rate": 5.526790579107951e-06, "loss": 17.1175, "step": 26392 }, { "epoch": 0.4824428318131135, "grad_norm": 7.107358197025265, "learning_rate": 5.526496214144912e-06, "loss": 17.9344, "step": 26393 }, { "epoch": 0.48246111100956, "grad_norm": 7.684102386305336, "learning_rate": 5.526201847336551e-06, "loss": 17.9892, "step": 26394 }, { "epoch": 0.48247939020600655, "grad_norm": 6.474374355161299, "learning_rate": 5.525907478683895e-06, "loss": 17.4138, "step": 26395 }, { "epoch": 0.4824976694024531, "grad_norm": 5.991150146906878, "learning_rate": 5.525613108187982e-06, "loss": 17.1133, "step": 26396 }, { "epoch": 0.48251594859889957, "grad_norm": 7.153046771873008, "learning_rate": 5.5253187358498385e-06, "loss": 17.7382, "step": 26397 }, { "epoch": 0.4825342277953461, "grad_norm": 5.927026542319743, "learning_rate": 5.5250243616705005e-06, "loss": 17.0766, "step": 26398 }, { "epoch": 0.48255250699179264, "grad_norm": 6.171271561810203, "learning_rate": 5.524729985650996e-06, "loss": 17.6482, "step": 26399 }, { "epoch": 0.4825707861882392, "grad_norm": 6.893844430349866, "learning_rate": 5.524435607792358e-06, "loss": 17.6515, "step": 26400 }, { "epoch": 0.4825890653846857, "grad_norm": 7.439381565053576, "learning_rate": 5.524141228095621e-06, "loss": 17.8956, "step": 26401 }, { "epoch": 0.4826073445811322, "grad_norm": 7.373664106598826, "learning_rate": 5.5238468465618135e-06, "loss": 17.9442, "step": 26402 }, { "epoch": 0.4826256237775787, "grad_norm": 6.9939627799868695, "learning_rate": 5.5235524631919664e-06, "loss": 17.7618, "step": 26403 }, { "epoch": 0.48264390297402526, "grad_norm": 6.217449746692742, "learning_rate": 5.523258077987116e-06, "loss": 17.2366, "step": 26404 }, { "epoch": 0.4826621821704718, "grad_norm": 6.475407706996417, "learning_rate": 5.5229636909482895e-06, "loss": 17.453, "step": 26405 }, { "epoch": 0.48268046136691833, "grad_norm": 6.771248412325351, "learning_rate": 5.522669302076522e-06, "loss": 17.6682, "step": 26406 }, { "epoch": 0.4826987405633648, "grad_norm": 7.932638380024133, "learning_rate": 5.522374911372843e-06, "loss": 17.7972, "step": 26407 }, { "epoch": 0.48271701975981135, "grad_norm": 5.398758511775816, "learning_rate": 5.522080518838286e-06, "loss": 17.0619, "step": 26408 }, { "epoch": 0.4827352989562579, "grad_norm": 7.134467064725874, "learning_rate": 5.521786124473881e-06, "loss": 17.676, "step": 26409 }, { "epoch": 0.4827535781527044, "grad_norm": 5.467138954842728, "learning_rate": 5.521491728280661e-06, "loss": 17.079, "step": 26410 }, { "epoch": 0.48277185734915096, "grad_norm": 7.448593383359493, "learning_rate": 5.52119733025966e-06, "loss": 17.7377, "step": 26411 }, { "epoch": 0.48279013654559744, "grad_norm": 6.706712332785757, "learning_rate": 5.5209029304119055e-06, "loss": 17.542, "step": 26412 }, { "epoch": 0.482808415742044, "grad_norm": 8.079782595318413, "learning_rate": 5.520608528738431e-06, "loss": 18.13, "step": 26413 }, { "epoch": 0.4828266949384905, "grad_norm": 6.180117578392903, "learning_rate": 5.520314125240269e-06, "loss": 17.2135, "step": 26414 }, { "epoch": 0.48284497413493704, "grad_norm": 6.413793734003815, "learning_rate": 5.520019719918454e-06, "loss": 17.4822, "step": 26415 }, { "epoch": 0.4828632533313836, "grad_norm": 5.416052383810696, "learning_rate": 5.519725312774012e-06, "loss": 16.9619, "step": 26416 }, { "epoch": 0.48288153252783006, "grad_norm": 8.515404032131901, "learning_rate": 5.519430903807979e-06, "loss": 18.2006, "step": 26417 }, { "epoch": 0.4828998117242766, "grad_norm": 6.718324835533906, "learning_rate": 5.519136493021385e-06, "loss": 17.7451, "step": 26418 }, { "epoch": 0.48291809092072313, "grad_norm": 7.557157321472496, "learning_rate": 5.518842080415263e-06, "loss": 18.0379, "step": 26419 }, { "epoch": 0.48293637011716967, "grad_norm": 5.491356103339672, "learning_rate": 5.518547665990644e-06, "loss": 16.8837, "step": 26420 }, { "epoch": 0.48295464931361615, "grad_norm": 6.039783839028251, "learning_rate": 5.518253249748562e-06, "loss": 17.4438, "step": 26421 }, { "epoch": 0.4829729285100627, "grad_norm": 7.234507615296453, "learning_rate": 5.517958831690047e-06, "loss": 17.5323, "step": 26422 }, { "epoch": 0.4829912077065092, "grad_norm": 7.147886885993875, "learning_rate": 5.517664411816129e-06, "loss": 17.9488, "step": 26423 }, { "epoch": 0.48300948690295575, "grad_norm": 6.7792975085443254, "learning_rate": 5.517369990127844e-06, "loss": 17.9162, "step": 26424 }, { "epoch": 0.4830277660994023, "grad_norm": 6.366299688558848, "learning_rate": 5.517075566626223e-06, "loss": 17.3968, "step": 26425 }, { "epoch": 0.48304604529584877, "grad_norm": 6.671544908143495, "learning_rate": 5.516781141312296e-06, "loss": 17.6329, "step": 26426 }, { "epoch": 0.4830643244922953, "grad_norm": 6.147532588853179, "learning_rate": 5.516486714187095e-06, "loss": 17.3035, "step": 26427 }, { "epoch": 0.48308260368874184, "grad_norm": 6.5213750406529005, "learning_rate": 5.516192285251654e-06, "loss": 17.7173, "step": 26428 }, { "epoch": 0.4831008828851884, "grad_norm": 6.299153225964926, "learning_rate": 5.515897854507004e-06, "loss": 17.5168, "step": 26429 }, { "epoch": 0.4831191620816349, "grad_norm": 6.884270250099057, "learning_rate": 5.5156034219541765e-06, "loss": 17.6303, "step": 26430 }, { "epoch": 0.4831374412780814, "grad_norm": 6.791941452825686, "learning_rate": 5.515308987594204e-06, "loss": 17.6293, "step": 26431 }, { "epoch": 0.4831557204745279, "grad_norm": 5.425015410045171, "learning_rate": 5.515014551428117e-06, "loss": 16.9689, "step": 26432 }, { "epoch": 0.48317399967097446, "grad_norm": 6.327286698870973, "learning_rate": 5.514720113456949e-06, "loss": 17.4335, "step": 26433 }, { "epoch": 0.483192278867421, "grad_norm": 5.751877844138184, "learning_rate": 5.514425673681732e-06, "loss": 17.1089, "step": 26434 }, { "epoch": 0.48321055806386753, "grad_norm": 7.826855071819306, "learning_rate": 5.514131232103498e-06, "loss": 17.887, "step": 26435 }, { "epoch": 0.483228837260314, "grad_norm": 5.554974774813623, "learning_rate": 5.513836788723279e-06, "loss": 17.195, "step": 26436 }, { "epoch": 0.48324711645676055, "grad_norm": 6.460077041217687, "learning_rate": 5.513542343542105e-06, "loss": 17.619, "step": 26437 }, { "epoch": 0.4832653956532071, "grad_norm": 6.559002295310229, "learning_rate": 5.513247896561011e-06, "loss": 17.7913, "step": 26438 }, { "epoch": 0.4832836748496536, "grad_norm": 6.33195986367585, "learning_rate": 5.5129534477810285e-06, "loss": 17.4724, "step": 26439 }, { "epoch": 0.48330195404610016, "grad_norm": 5.912952227299503, "learning_rate": 5.512658997203187e-06, "loss": 17.3697, "step": 26440 }, { "epoch": 0.48332023324254664, "grad_norm": 5.983003719290201, "learning_rate": 5.51236454482852e-06, "loss": 17.4935, "step": 26441 }, { "epoch": 0.4833385124389932, "grad_norm": 6.084286934451618, "learning_rate": 5.5120700906580614e-06, "loss": 17.2255, "step": 26442 }, { "epoch": 0.4833567916354397, "grad_norm": 5.918171952761315, "learning_rate": 5.5117756346928406e-06, "loss": 17.3408, "step": 26443 }, { "epoch": 0.48337507083188624, "grad_norm": 7.875499809776562, "learning_rate": 5.51148117693389e-06, "loss": 17.8208, "step": 26444 }, { "epoch": 0.4833933500283328, "grad_norm": 5.9383213103735315, "learning_rate": 5.511186717382244e-06, "loss": 17.3032, "step": 26445 }, { "epoch": 0.48341162922477926, "grad_norm": 9.484592910129143, "learning_rate": 5.510892256038932e-06, "loss": 18.4924, "step": 26446 }, { "epoch": 0.4834299084212258, "grad_norm": 6.985124629907305, "learning_rate": 5.510597792904987e-06, "loss": 17.5429, "step": 26447 }, { "epoch": 0.48344818761767233, "grad_norm": 5.5846440055655755, "learning_rate": 5.51030332798144e-06, "loss": 17.1491, "step": 26448 }, { "epoch": 0.48346646681411887, "grad_norm": 5.508266345734633, "learning_rate": 5.510008861269325e-06, "loss": 17.0911, "step": 26449 }, { "epoch": 0.4834847460105654, "grad_norm": 6.573095767583956, "learning_rate": 5.509714392769674e-06, "loss": 17.2324, "step": 26450 }, { "epoch": 0.4835030252070119, "grad_norm": 7.010458442249363, "learning_rate": 5.509419922483516e-06, "loss": 17.8, "step": 26451 }, { "epoch": 0.4835213044034584, "grad_norm": 6.951120102039227, "learning_rate": 5.509125450411888e-06, "loss": 17.402, "step": 26452 }, { "epoch": 0.48353958359990495, "grad_norm": 6.355362812699377, "learning_rate": 5.508830976555819e-06, "loss": 17.0563, "step": 26453 }, { "epoch": 0.4835578627963515, "grad_norm": 6.308735858351048, "learning_rate": 5.5085365009163394e-06, "loss": 17.3058, "step": 26454 }, { "epoch": 0.48357614199279797, "grad_norm": 6.79619632781194, "learning_rate": 5.508242023494486e-06, "loss": 17.7997, "step": 26455 }, { "epoch": 0.4835944211892445, "grad_norm": 6.67685624025841, "learning_rate": 5.507947544291288e-06, "loss": 17.1824, "step": 26456 }, { "epoch": 0.48361270038569104, "grad_norm": 7.292173648165463, "learning_rate": 5.507653063307777e-06, "loss": 17.9333, "step": 26457 }, { "epoch": 0.4836309795821376, "grad_norm": 6.360337230182274, "learning_rate": 5.507358580544986e-06, "loss": 17.4501, "step": 26458 }, { "epoch": 0.4836492587785841, "grad_norm": 8.908950701616417, "learning_rate": 5.507064096003947e-06, "loss": 18.7293, "step": 26459 }, { "epoch": 0.4836675379750306, "grad_norm": 6.081792855704345, "learning_rate": 5.506769609685694e-06, "loss": 17.4807, "step": 26460 }, { "epoch": 0.48368581717147713, "grad_norm": 5.310649674090544, "learning_rate": 5.506475121591256e-06, "loss": 16.9886, "step": 26461 }, { "epoch": 0.48370409636792366, "grad_norm": 6.463605555093732, "learning_rate": 5.5061806317216675e-06, "loss": 17.5928, "step": 26462 }, { "epoch": 0.4837223755643702, "grad_norm": 6.645141703076254, "learning_rate": 5.505886140077959e-06, "loss": 17.6156, "step": 26463 }, { "epoch": 0.48374065476081674, "grad_norm": 7.919740673887846, "learning_rate": 5.505591646661163e-06, "loss": 17.6663, "step": 26464 }, { "epoch": 0.4837589339572632, "grad_norm": 6.380795144980014, "learning_rate": 5.505297151472314e-06, "loss": 17.497, "step": 26465 }, { "epoch": 0.48377721315370975, "grad_norm": 5.354570423059798, "learning_rate": 5.505002654512442e-06, "loss": 17.2725, "step": 26466 }, { "epoch": 0.4837954923501563, "grad_norm": 7.920450163534305, "learning_rate": 5.504708155782579e-06, "loss": 18.361, "step": 26467 }, { "epoch": 0.4838137715466028, "grad_norm": 6.797506938434176, "learning_rate": 5.504413655283757e-06, "loss": 17.4863, "step": 26468 }, { "epoch": 0.48383205074304936, "grad_norm": 6.1863490212084455, "learning_rate": 5.504119153017009e-06, "loss": 17.4748, "step": 26469 }, { "epoch": 0.48385032993949584, "grad_norm": 9.94859428382597, "learning_rate": 5.503824648983369e-06, "loss": 18.8714, "step": 26470 }, { "epoch": 0.4838686091359424, "grad_norm": 5.984985549829618, "learning_rate": 5.503530143183865e-06, "loss": 17.3481, "step": 26471 }, { "epoch": 0.4838868883323889, "grad_norm": 7.570580355804501, "learning_rate": 5.5032356356195325e-06, "loss": 18.2996, "step": 26472 }, { "epoch": 0.48390516752883544, "grad_norm": 8.095710540796654, "learning_rate": 5.502941126291402e-06, "loss": 17.7676, "step": 26473 }, { "epoch": 0.483923446725282, "grad_norm": 6.442904414187398, "learning_rate": 5.502646615200509e-06, "loss": 17.41, "step": 26474 }, { "epoch": 0.48394172592172846, "grad_norm": 5.8999603205240385, "learning_rate": 5.502352102347881e-06, "loss": 17.0339, "step": 26475 }, { "epoch": 0.483960005118175, "grad_norm": 7.594679173460728, "learning_rate": 5.502057587734553e-06, "loss": 17.767, "step": 26476 }, { "epoch": 0.48397828431462153, "grad_norm": 6.339421754426708, "learning_rate": 5.501763071361557e-06, "loss": 17.576, "step": 26477 }, { "epoch": 0.48399656351106807, "grad_norm": 5.493731792588541, "learning_rate": 5.501468553229924e-06, "loss": 17.3342, "step": 26478 }, { "epoch": 0.4840148427075146, "grad_norm": 5.890005557295245, "learning_rate": 5.501174033340687e-06, "loss": 17.1531, "step": 26479 }, { "epoch": 0.4840331219039611, "grad_norm": 8.516081426871658, "learning_rate": 5.500879511694881e-06, "loss": 18.3053, "step": 26480 }, { "epoch": 0.4840514011004076, "grad_norm": 6.46748852795383, "learning_rate": 5.500584988293534e-06, "loss": 17.4105, "step": 26481 }, { "epoch": 0.48406968029685415, "grad_norm": 6.58473814722474, "learning_rate": 5.50029046313768e-06, "loss": 17.5878, "step": 26482 }, { "epoch": 0.4840879594933007, "grad_norm": 5.820451167813538, "learning_rate": 5.49999593622835e-06, "loss": 17.3362, "step": 26483 }, { "epoch": 0.4841062386897472, "grad_norm": 6.398707813036087, "learning_rate": 5.499701407566581e-06, "loss": 17.45, "step": 26484 }, { "epoch": 0.4841245178861937, "grad_norm": 6.657717437355664, "learning_rate": 5.499406877153401e-06, "loss": 17.7112, "step": 26485 }, { "epoch": 0.48414279708264024, "grad_norm": 5.654215096790829, "learning_rate": 5.49911234498984e-06, "loss": 17.0652, "step": 26486 }, { "epoch": 0.4841610762790868, "grad_norm": 6.9519255804161, "learning_rate": 5.498817811076938e-06, "loss": 17.8553, "step": 26487 }, { "epoch": 0.4841793554755333, "grad_norm": 6.242247399787906, "learning_rate": 5.49852327541572e-06, "loss": 17.5432, "step": 26488 }, { "epoch": 0.4841976346719798, "grad_norm": 6.713974957489091, "learning_rate": 5.498228738007222e-06, "loss": 17.3966, "step": 26489 }, { "epoch": 0.48421591386842633, "grad_norm": 6.439268771521275, "learning_rate": 5.497934198852475e-06, "loss": 17.7379, "step": 26490 }, { "epoch": 0.48423419306487286, "grad_norm": 6.713643240167457, "learning_rate": 5.497639657952513e-06, "loss": 17.6121, "step": 26491 }, { "epoch": 0.4842524722613194, "grad_norm": 6.863243618818315, "learning_rate": 5.497345115308366e-06, "loss": 17.7486, "step": 26492 }, { "epoch": 0.48427075145776594, "grad_norm": 6.463231846157784, "learning_rate": 5.497050570921067e-06, "loss": 17.3952, "step": 26493 }, { "epoch": 0.4842890306542124, "grad_norm": 7.895602202068241, "learning_rate": 5.4967560247916516e-06, "loss": 17.902, "step": 26494 }, { "epoch": 0.48430730985065895, "grad_norm": 7.699725467897229, "learning_rate": 5.496461476921147e-06, "loss": 18.0572, "step": 26495 }, { "epoch": 0.4843255890471055, "grad_norm": 6.638513230121819, "learning_rate": 5.4961669273105875e-06, "loss": 17.6035, "step": 26496 }, { "epoch": 0.484343868243552, "grad_norm": 6.281047227787639, "learning_rate": 5.495872375961008e-06, "loss": 16.9826, "step": 26497 }, { "epoch": 0.48436214743999856, "grad_norm": 6.55216091310133, "learning_rate": 5.495577822873439e-06, "loss": 17.9349, "step": 26498 }, { "epoch": 0.48438042663644504, "grad_norm": 5.6852815409141435, "learning_rate": 5.495283268048912e-06, "loss": 16.9378, "step": 26499 }, { "epoch": 0.4843987058328916, "grad_norm": 7.084329263200316, "learning_rate": 5.494988711488458e-06, "loss": 17.6849, "step": 26500 }, { "epoch": 0.4844169850293381, "grad_norm": 4.411131277470196, "learning_rate": 5.4946941531931146e-06, "loss": 16.6473, "step": 26501 }, { "epoch": 0.48443526422578465, "grad_norm": 6.360299954623239, "learning_rate": 5.49439959316391e-06, "loss": 17.5655, "step": 26502 }, { "epoch": 0.4844535434222312, "grad_norm": 7.005715812781231, "learning_rate": 5.494105031401877e-06, "loss": 17.8182, "step": 26503 }, { "epoch": 0.48447182261867766, "grad_norm": 7.065328420310809, "learning_rate": 5.49381046790805e-06, "loss": 17.8284, "step": 26504 }, { "epoch": 0.4844901018151242, "grad_norm": 7.66959744448296, "learning_rate": 5.493515902683459e-06, "loss": 18.0271, "step": 26505 }, { "epoch": 0.48450838101157073, "grad_norm": 6.852720664283129, "learning_rate": 5.493221335729139e-06, "loss": 17.7273, "step": 26506 }, { "epoch": 0.48452666020801727, "grad_norm": 6.1968679770201005, "learning_rate": 5.49292676704612e-06, "loss": 17.4665, "step": 26507 }, { "epoch": 0.4845449394044638, "grad_norm": 6.000632562630498, "learning_rate": 5.492632196635436e-06, "loss": 17.4382, "step": 26508 }, { "epoch": 0.4845632186009103, "grad_norm": 6.07520775388825, "learning_rate": 5.492337624498117e-06, "loss": 17.476, "step": 26509 }, { "epoch": 0.4845814977973568, "grad_norm": 6.671538107506655, "learning_rate": 5.4920430506351995e-06, "loss": 17.4663, "step": 26510 }, { "epoch": 0.48459977699380336, "grad_norm": 5.957206393544445, "learning_rate": 5.491748475047714e-06, "loss": 17.3816, "step": 26511 }, { "epoch": 0.4846180561902499, "grad_norm": 5.550194755608161, "learning_rate": 5.491453897736692e-06, "loss": 17.0009, "step": 26512 }, { "epoch": 0.4846363353866964, "grad_norm": 6.1428089729205695, "learning_rate": 5.491159318703165e-06, "loss": 17.1129, "step": 26513 }, { "epoch": 0.4846546145831429, "grad_norm": 6.285517326065865, "learning_rate": 5.490864737948169e-06, "loss": 17.4101, "step": 26514 }, { "epoch": 0.48467289377958944, "grad_norm": 6.65362113295435, "learning_rate": 5.4905701554727365e-06, "loss": 17.5512, "step": 26515 }, { "epoch": 0.484691172976036, "grad_norm": 6.620467063218983, "learning_rate": 5.490275571277896e-06, "loss": 17.8127, "step": 26516 }, { "epoch": 0.4847094521724825, "grad_norm": 5.786860733968187, "learning_rate": 5.489980985364682e-06, "loss": 17.1823, "step": 26517 }, { "epoch": 0.48472773136892905, "grad_norm": 6.009540133094905, "learning_rate": 5.4896863977341275e-06, "loss": 17.1732, "step": 26518 }, { "epoch": 0.48474601056537553, "grad_norm": 5.942866457705427, "learning_rate": 5.489391808387265e-06, "loss": 17.2876, "step": 26519 }, { "epoch": 0.48476428976182206, "grad_norm": 6.147859989217325, "learning_rate": 5.489097217325127e-06, "loss": 17.4091, "step": 26520 }, { "epoch": 0.4847825689582686, "grad_norm": 5.839743379545554, "learning_rate": 5.4888026245487444e-06, "loss": 17.1816, "step": 26521 }, { "epoch": 0.48480084815471514, "grad_norm": 6.987820665477988, "learning_rate": 5.488508030059152e-06, "loss": 17.4251, "step": 26522 }, { "epoch": 0.4848191273511616, "grad_norm": 6.279484115563279, "learning_rate": 5.488213433857381e-06, "loss": 17.4336, "step": 26523 }, { "epoch": 0.48483740654760815, "grad_norm": 7.064761281317023, "learning_rate": 5.487918835944465e-06, "loss": 17.8791, "step": 26524 }, { "epoch": 0.4848556857440547, "grad_norm": 8.05195396523413, "learning_rate": 5.487624236321435e-06, "loss": 17.53, "step": 26525 }, { "epoch": 0.4848739649405012, "grad_norm": 7.037036320561439, "learning_rate": 5.487329634989325e-06, "loss": 17.7999, "step": 26526 }, { "epoch": 0.48489224413694776, "grad_norm": 5.150846998602031, "learning_rate": 5.487035031949165e-06, "loss": 16.9384, "step": 26527 }, { "epoch": 0.48491052333339424, "grad_norm": 5.853581615381309, "learning_rate": 5.486740427201991e-06, "loss": 17.1325, "step": 26528 }, { "epoch": 0.4849288025298408, "grad_norm": 7.924993206481882, "learning_rate": 5.486445820748835e-06, "loss": 17.6764, "step": 26529 }, { "epoch": 0.4849470817262873, "grad_norm": 7.8504844587924, "learning_rate": 5.486151212590728e-06, "loss": 18.0291, "step": 26530 }, { "epoch": 0.48496536092273385, "grad_norm": 5.809219210938385, "learning_rate": 5.485856602728702e-06, "loss": 17.1463, "step": 26531 }, { "epoch": 0.4849836401191804, "grad_norm": 8.660822147117912, "learning_rate": 5.485561991163791e-06, "loss": 18.2508, "step": 26532 }, { "epoch": 0.48500191931562686, "grad_norm": 6.271816895138902, "learning_rate": 5.485267377897029e-06, "loss": 17.5188, "step": 26533 }, { "epoch": 0.4850201985120734, "grad_norm": 7.230829243055291, "learning_rate": 5.484972762929446e-06, "loss": 17.5166, "step": 26534 }, { "epoch": 0.48503847770851993, "grad_norm": 5.104340002457307, "learning_rate": 5.484678146262075e-06, "loss": 17.0811, "step": 26535 }, { "epoch": 0.48505675690496647, "grad_norm": 6.016658967529224, "learning_rate": 5.484383527895949e-06, "loss": 17.5314, "step": 26536 }, { "epoch": 0.485075036101413, "grad_norm": 6.387333265667369, "learning_rate": 5.484088907832102e-06, "loss": 17.7089, "step": 26537 }, { "epoch": 0.4850933152978595, "grad_norm": 7.233007439047168, "learning_rate": 5.483794286071565e-06, "loss": 17.8259, "step": 26538 }, { "epoch": 0.485111594494306, "grad_norm": 6.07590553003209, "learning_rate": 5.483499662615371e-06, "loss": 17.429, "step": 26539 }, { "epoch": 0.48512987369075256, "grad_norm": 6.550549145304181, "learning_rate": 5.483205037464552e-06, "loss": 17.4891, "step": 26540 }, { "epoch": 0.4851481528871991, "grad_norm": 5.359384545009174, "learning_rate": 5.4829104106201415e-06, "loss": 17.0303, "step": 26541 }, { "epoch": 0.4851664320836456, "grad_norm": 5.337846074159136, "learning_rate": 5.4826157820831715e-06, "loss": 17.0801, "step": 26542 }, { "epoch": 0.4851847112800921, "grad_norm": 6.631213815437908, "learning_rate": 5.482321151854677e-06, "loss": 17.6556, "step": 26543 }, { "epoch": 0.48520299047653864, "grad_norm": 7.722108684275236, "learning_rate": 5.482026519935687e-06, "loss": 17.1154, "step": 26544 }, { "epoch": 0.4852212696729852, "grad_norm": 6.857963631684011, "learning_rate": 5.481731886327235e-06, "loss": 17.8986, "step": 26545 }, { "epoch": 0.4852395488694317, "grad_norm": 5.79207267534587, "learning_rate": 5.481437251030357e-06, "loss": 17.3041, "step": 26546 }, { "epoch": 0.48525782806587825, "grad_norm": 6.3787164053305245, "learning_rate": 5.4811426140460825e-06, "loss": 17.3586, "step": 26547 }, { "epoch": 0.48527610726232473, "grad_norm": 8.393542094603822, "learning_rate": 5.480847975375444e-06, "loss": 18.3381, "step": 26548 }, { "epoch": 0.48529438645877127, "grad_norm": 6.574024373888554, "learning_rate": 5.480553335019475e-06, "loss": 17.5777, "step": 26549 }, { "epoch": 0.4853126656552178, "grad_norm": 5.98329693173344, "learning_rate": 5.4802586929792086e-06, "loss": 17.3316, "step": 26550 }, { "epoch": 0.48533094485166434, "grad_norm": 6.985402588071238, "learning_rate": 5.479964049255677e-06, "loss": 17.6135, "step": 26551 }, { "epoch": 0.48534922404811087, "grad_norm": 6.4466575683269935, "learning_rate": 5.479669403849913e-06, "loss": 17.6279, "step": 26552 }, { "epoch": 0.48536750324455735, "grad_norm": 5.189915395214789, "learning_rate": 5.4793747567629504e-06, "loss": 17.1822, "step": 26553 }, { "epoch": 0.4853857824410039, "grad_norm": 6.067557612326647, "learning_rate": 5.47908010799582e-06, "loss": 17.5608, "step": 26554 }, { "epoch": 0.4854040616374504, "grad_norm": 7.594750565730956, "learning_rate": 5.478785457549555e-06, "loss": 17.7572, "step": 26555 }, { "epoch": 0.48542234083389696, "grad_norm": 6.435276649579518, "learning_rate": 5.478490805425191e-06, "loss": 17.6635, "step": 26556 }, { "epoch": 0.48544062003034344, "grad_norm": 7.11158491200244, "learning_rate": 5.4781961516237555e-06, "loss": 17.7047, "step": 26557 }, { "epoch": 0.48545889922679, "grad_norm": 4.733950175664281, "learning_rate": 5.477901496146285e-06, "loss": 16.7589, "step": 26558 }, { "epoch": 0.4854771784232365, "grad_norm": 7.0080832196589276, "learning_rate": 5.47760683899381e-06, "loss": 17.9338, "step": 26559 }, { "epoch": 0.48549545761968305, "grad_norm": 5.0607686591002965, "learning_rate": 5.477312180167366e-06, "loss": 16.995, "step": 26560 }, { "epoch": 0.4855137368161296, "grad_norm": 6.137308209246509, "learning_rate": 5.4770175196679845e-06, "loss": 17.2181, "step": 26561 }, { "epoch": 0.48553201601257606, "grad_norm": 6.7271861283140755, "learning_rate": 5.4767228574966965e-06, "loss": 17.5663, "step": 26562 }, { "epoch": 0.4855502952090226, "grad_norm": 5.148819457273331, "learning_rate": 5.476428193654537e-06, "loss": 16.822, "step": 26563 }, { "epoch": 0.48556857440546913, "grad_norm": 7.003527994582009, "learning_rate": 5.4761335281425375e-06, "loss": 17.6496, "step": 26564 }, { "epoch": 0.48558685360191567, "grad_norm": 8.40255245863197, "learning_rate": 5.475838860961732e-06, "loss": 17.9476, "step": 26565 }, { "epoch": 0.4856051327983622, "grad_norm": 6.653719698463278, "learning_rate": 5.475544192113152e-06, "loss": 17.3842, "step": 26566 }, { "epoch": 0.4856234119948087, "grad_norm": 5.443807941247819, "learning_rate": 5.475249521597833e-06, "loss": 16.907, "step": 26567 }, { "epoch": 0.4856416911912552, "grad_norm": 5.86720973065785, "learning_rate": 5.474954849416802e-06, "loss": 17.2419, "step": 26568 }, { "epoch": 0.48565997038770176, "grad_norm": 6.2895047620896065, "learning_rate": 5.474660175571098e-06, "loss": 17.3104, "step": 26569 }, { "epoch": 0.4856782495841483, "grad_norm": 7.6818301270678155, "learning_rate": 5.4743655000617515e-06, "loss": 18.2199, "step": 26570 }, { "epoch": 0.4856965287805948, "grad_norm": 7.084022234762375, "learning_rate": 5.474070822889795e-06, "loss": 18.0019, "step": 26571 }, { "epoch": 0.4857148079770413, "grad_norm": 6.868505220168283, "learning_rate": 5.47377614405626e-06, "loss": 17.9504, "step": 26572 }, { "epoch": 0.48573308717348784, "grad_norm": 6.6229483247928735, "learning_rate": 5.47348146356218e-06, "loss": 17.7776, "step": 26573 }, { "epoch": 0.4857513663699344, "grad_norm": 6.440645746533691, "learning_rate": 5.473186781408591e-06, "loss": 17.5428, "step": 26574 }, { "epoch": 0.4857696455663809, "grad_norm": 7.515916814424151, "learning_rate": 5.4728920975965214e-06, "loss": 17.715, "step": 26575 }, { "epoch": 0.48578792476282745, "grad_norm": 6.886688046334952, "learning_rate": 5.472597412127008e-06, "loss": 17.5188, "step": 26576 }, { "epoch": 0.48580620395927393, "grad_norm": 6.25423326557669, "learning_rate": 5.47230272500108e-06, "loss": 17.1894, "step": 26577 }, { "epoch": 0.48582448315572047, "grad_norm": 5.149576858402395, "learning_rate": 5.472008036219772e-06, "loss": 17.0235, "step": 26578 }, { "epoch": 0.485842762352167, "grad_norm": 5.221973513778705, "learning_rate": 5.471713345784118e-06, "loss": 16.9643, "step": 26579 }, { "epoch": 0.48586104154861354, "grad_norm": 5.60671239177601, "learning_rate": 5.471418653695149e-06, "loss": 17.1331, "step": 26580 }, { "epoch": 0.4858793207450601, "grad_norm": 8.106985399703962, "learning_rate": 5.471123959953898e-06, "loss": 17.6359, "step": 26581 }, { "epoch": 0.48589759994150655, "grad_norm": 7.437369590931693, "learning_rate": 5.4708292645613995e-06, "loss": 17.9801, "step": 26582 }, { "epoch": 0.4859158791379531, "grad_norm": 5.063156408596951, "learning_rate": 5.470534567518684e-06, "loss": 16.9018, "step": 26583 }, { "epoch": 0.4859341583343996, "grad_norm": 9.493058215283343, "learning_rate": 5.470239868826788e-06, "loss": 17.5201, "step": 26584 }, { "epoch": 0.48595243753084616, "grad_norm": 6.557149756489445, "learning_rate": 5.46994516848674e-06, "loss": 17.3902, "step": 26585 }, { "epoch": 0.4859707167272927, "grad_norm": 7.111068340274383, "learning_rate": 5.469650466499574e-06, "loss": 17.8025, "step": 26586 }, { "epoch": 0.4859889959237392, "grad_norm": 7.015342299448457, "learning_rate": 5.469355762866327e-06, "loss": 17.7311, "step": 26587 }, { "epoch": 0.4860072751201857, "grad_norm": 5.7164801045758695, "learning_rate": 5.469061057588027e-06, "loss": 17.3174, "step": 26588 }, { "epoch": 0.48602555431663225, "grad_norm": 5.642670381011369, "learning_rate": 5.468766350665709e-06, "loss": 17.0686, "step": 26589 }, { "epoch": 0.4860438335130788, "grad_norm": 8.514006608723344, "learning_rate": 5.468471642100404e-06, "loss": 17.354, "step": 26590 }, { "epoch": 0.48606211270952526, "grad_norm": 6.285525586919754, "learning_rate": 5.468176931893149e-06, "loss": 17.5392, "step": 26591 }, { "epoch": 0.4860803919059718, "grad_norm": 4.87026401636559, "learning_rate": 5.467882220044973e-06, "loss": 17.0176, "step": 26592 }, { "epoch": 0.48609867110241833, "grad_norm": 6.604639662487142, "learning_rate": 5.467587506556911e-06, "loss": 17.5402, "step": 26593 }, { "epoch": 0.48611695029886487, "grad_norm": 7.691647893381333, "learning_rate": 5.467292791429997e-06, "loss": 17.6207, "step": 26594 }, { "epoch": 0.4861352294953114, "grad_norm": 5.898843312344089, "learning_rate": 5.466998074665259e-06, "loss": 17.2536, "step": 26595 }, { "epoch": 0.4861535086917579, "grad_norm": 7.573039342632927, "learning_rate": 5.466703356263734e-06, "loss": 17.5114, "step": 26596 }, { "epoch": 0.4861717878882044, "grad_norm": 5.101249641526701, "learning_rate": 5.466408636226455e-06, "loss": 16.9151, "step": 26597 }, { "epoch": 0.48619006708465096, "grad_norm": 5.8520661688430575, "learning_rate": 5.466113914554455e-06, "loss": 17.3912, "step": 26598 }, { "epoch": 0.4862083462810975, "grad_norm": 6.020245283050912, "learning_rate": 5.465819191248766e-06, "loss": 17.3466, "step": 26599 }, { "epoch": 0.486226625477544, "grad_norm": 5.594927094598268, "learning_rate": 5.465524466310419e-06, "loss": 17.1538, "step": 26600 }, { "epoch": 0.4862449046739905, "grad_norm": 7.47649543567741, "learning_rate": 5.465229739740452e-06, "loss": 18.611, "step": 26601 }, { "epoch": 0.48626318387043704, "grad_norm": 5.2141971011979855, "learning_rate": 5.464935011539894e-06, "loss": 17.0901, "step": 26602 }, { "epoch": 0.4862814630668836, "grad_norm": 6.845905298465474, "learning_rate": 5.464640281709779e-06, "loss": 17.6799, "step": 26603 }, { "epoch": 0.4862997422633301, "grad_norm": 6.394076981508338, "learning_rate": 5.46434555025114e-06, "loss": 17.6783, "step": 26604 }, { "epoch": 0.48631802145977665, "grad_norm": 5.506820170598393, "learning_rate": 5.464050817165009e-06, "loss": 16.9771, "step": 26605 }, { "epoch": 0.48633630065622313, "grad_norm": 7.344541839171006, "learning_rate": 5.463756082452423e-06, "loss": 17.163, "step": 26606 }, { "epoch": 0.48635457985266967, "grad_norm": 6.527649923861331, "learning_rate": 5.463461346114409e-06, "loss": 17.3594, "step": 26607 }, { "epoch": 0.4863728590491162, "grad_norm": 5.226281276789873, "learning_rate": 5.463166608152005e-06, "loss": 17.0518, "step": 26608 }, { "epoch": 0.48639113824556274, "grad_norm": 5.9010886221186505, "learning_rate": 5.462871868566242e-06, "loss": 17.4303, "step": 26609 }, { "epoch": 0.4864094174420093, "grad_norm": 7.05978247832892, "learning_rate": 5.462577127358154e-06, "loss": 17.7988, "step": 26610 }, { "epoch": 0.48642769663845575, "grad_norm": 7.197769770156895, "learning_rate": 5.462282384528773e-06, "loss": 18.1668, "step": 26611 }, { "epoch": 0.4864459758349023, "grad_norm": 7.142218245417265, "learning_rate": 5.461987640079132e-06, "loss": 17.8786, "step": 26612 }, { "epoch": 0.4864642550313488, "grad_norm": 6.449349589955589, "learning_rate": 5.461692894010263e-06, "loss": 17.7673, "step": 26613 }, { "epoch": 0.48648253422779536, "grad_norm": 6.426401988205395, "learning_rate": 5.461398146323202e-06, "loss": 17.8083, "step": 26614 }, { "epoch": 0.4865008134242419, "grad_norm": 7.282501321645925, "learning_rate": 5.461103397018981e-06, "loss": 17.8547, "step": 26615 }, { "epoch": 0.4865190926206884, "grad_norm": 6.3285525513507555, "learning_rate": 5.4608086460986324e-06, "loss": 17.5695, "step": 26616 }, { "epoch": 0.4865373718171349, "grad_norm": 6.616748535093236, "learning_rate": 5.460513893563189e-06, "loss": 17.4545, "step": 26617 }, { "epoch": 0.48655565101358145, "grad_norm": 6.015220408534, "learning_rate": 5.460219139413684e-06, "loss": 17.2147, "step": 26618 }, { "epoch": 0.486573930210028, "grad_norm": 6.001736263614366, "learning_rate": 5.4599243836511516e-06, "loss": 17.3096, "step": 26619 }, { "epoch": 0.4865922094064745, "grad_norm": 7.029744272317311, "learning_rate": 5.459629626276624e-06, "loss": 17.556, "step": 26620 }, { "epoch": 0.486610488602921, "grad_norm": 6.365931427236614, "learning_rate": 5.459334867291134e-06, "loss": 17.6441, "step": 26621 }, { "epoch": 0.48662876779936753, "grad_norm": 7.774243933733844, "learning_rate": 5.459040106695716e-06, "loss": 18.2137, "step": 26622 }, { "epoch": 0.48664704699581407, "grad_norm": 5.9603038412986376, "learning_rate": 5.4587453444914e-06, "loss": 17.4149, "step": 26623 }, { "epoch": 0.4866653261922606, "grad_norm": 5.659576186668394, "learning_rate": 5.458450580679223e-06, "loss": 17.3379, "step": 26624 }, { "epoch": 0.4866836053887071, "grad_norm": 6.196018730642399, "learning_rate": 5.458155815260219e-06, "loss": 17.4753, "step": 26625 }, { "epoch": 0.4867018845851536, "grad_norm": 7.024416450972192, "learning_rate": 5.457861048235416e-06, "loss": 17.7739, "step": 26626 }, { "epoch": 0.48672016378160016, "grad_norm": 5.400957036917106, "learning_rate": 5.457566279605848e-06, "loss": 17.0448, "step": 26627 }, { "epoch": 0.4867384429780467, "grad_norm": 5.8155318177455575, "learning_rate": 5.4572715093725515e-06, "loss": 17.4156, "step": 26628 }, { "epoch": 0.48675672217449323, "grad_norm": 5.562979741031682, "learning_rate": 5.45697673753656e-06, "loss": 17.1216, "step": 26629 }, { "epoch": 0.4867750013709397, "grad_norm": 7.12242779394732, "learning_rate": 5.456681964098902e-06, "loss": 17.4697, "step": 26630 }, { "epoch": 0.48679328056738624, "grad_norm": 5.889235737164289, "learning_rate": 5.4563871890606145e-06, "loss": 17.4843, "step": 26631 }, { "epoch": 0.4868115597638328, "grad_norm": 6.642818958598797, "learning_rate": 5.45609241242273e-06, "loss": 17.4714, "step": 26632 }, { "epoch": 0.4868298389602793, "grad_norm": 5.585845806917995, "learning_rate": 5.45579763418628e-06, "loss": 17.255, "step": 26633 }, { "epoch": 0.48684811815672585, "grad_norm": 5.393327362257041, "learning_rate": 5.455502854352299e-06, "loss": 17.2974, "step": 26634 }, { "epoch": 0.48686639735317233, "grad_norm": 7.314095707053459, "learning_rate": 5.455208072921821e-06, "loss": 17.8221, "step": 26635 }, { "epoch": 0.48688467654961887, "grad_norm": 7.0350601960001535, "learning_rate": 5.454913289895878e-06, "loss": 17.6477, "step": 26636 }, { "epoch": 0.4869029557460654, "grad_norm": 6.931914639525556, "learning_rate": 5.454618505275503e-06, "loss": 17.7597, "step": 26637 }, { "epoch": 0.48692123494251194, "grad_norm": 7.275927073570928, "learning_rate": 5.454323719061729e-06, "loss": 17.9571, "step": 26638 }, { "epoch": 0.4869395141389585, "grad_norm": 5.102907014372395, "learning_rate": 5.454028931255592e-06, "loss": 16.9075, "step": 26639 }, { "epoch": 0.48695779333540495, "grad_norm": 5.730347467741487, "learning_rate": 5.45373414185812e-06, "loss": 17.3043, "step": 26640 }, { "epoch": 0.4869760725318515, "grad_norm": 7.3836310643565914, "learning_rate": 5.453439350870351e-06, "loss": 17.4446, "step": 26641 }, { "epoch": 0.486994351728298, "grad_norm": 5.020498286082509, "learning_rate": 5.453144558293315e-06, "loss": 16.9975, "step": 26642 }, { "epoch": 0.48701263092474456, "grad_norm": 8.013904751478776, "learning_rate": 5.452849764128049e-06, "loss": 18.1751, "step": 26643 }, { "epoch": 0.4870309101211911, "grad_norm": 7.2824823067803175, "learning_rate": 5.452554968375583e-06, "loss": 17.982, "step": 26644 }, { "epoch": 0.4870491893176376, "grad_norm": 6.719829505022645, "learning_rate": 5.452260171036949e-06, "loss": 17.6701, "step": 26645 }, { "epoch": 0.4870674685140841, "grad_norm": 5.584415576101518, "learning_rate": 5.451965372113185e-06, "loss": 17.0621, "step": 26646 }, { "epoch": 0.48708574771053065, "grad_norm": 6.707432748749226, "learning_rate": 5.451670571605321e-06, "loss": 17.4253, "step": 26647 }, { "epoch": 0.4871040269069772, "grad_norm": 7.062764180772722, "learning_rate": 5.45137576951439e-06, "loss": 17.8118, "step": 26648 }, { "epoch": 0.4871223061034237, "grad_norm": 9.202990816780765, "learning_rate": 5.451080965841427e-06, "loss": 17.9016, "step": 26649 }, { "epoch": 0.4871405852998702, "grad_norm": 6.32233053160526, "learning_rate": 5.450786160587463e-06, "loss": 17.4882, "step": 26650 }, { "epoch": 0.48715886449631673, "grad_norm": 6.069334385436446, "learning_rate": 5.450491353753533e-06, "loss": 17.2714, "step": 26651 }, { "epoch": 0.48717714369276327, "grad_norm": 6.147846392541621, "learning_rate": 5.450196545340671e-06, "loss": 17.4155, "step": 26652 }, { "epoch": 0.4871954228892098, "grad_norm": 6.07501178327118, "learning_rate": 5.4499017353499095e-06, "loss": 17.3031, "step": 26653 }, { "epoch": 0.48721370208565634, "grad_norm": 4.661875821745856, "learning_rate": 5.449606923782279e-06, "loss": 16.7665, "step": 26654 }, { "epoch": 0.4872319812821028, "grad_norm": 6.967972881934136, "learning_rate": 5.449312110638817e-06, "loss": 17.7271, "step": 26655 }, { "epoch": 0.48725026047854936, "grad_norm": 8.872333170762872, "learning_rate": 5.449017295920556e-06, "loss": 17.9727, "step": 26656 }, { "epoch": 0.4872685396749959, "grad_norm": 6.082674160655565, "learning_rate": 5.448722479628528e-06, "loss": 17.541, "step": 26657 }, { "epoch": 0.48728681887144243, "grad_norm": 5.8644745001181775, "learning_rate": 5.448427661763766e-06, "loss": 17.274, "step": 26658 }, { "epoch": 0.4873050980678889, "grad_norm": 8.562532643994132, "learning_rate": 5.448132842327302e-06, "loss": 18.3781, "step": 26659 }, { "epoch": 0.48732337726433544, "grad_norm": 7.318551635687446, "learning_rate": 5.447838021320173e-06, "loss": 17.994, "step": 26660 }, { "epoch": 0.487341656460782, "grad_norm": 7.048067486365951, "learning_rate": 5.447543198743411e-06, "loss": 17.6905, "step": 26661 }, { "epoch": 0.4873599356572285, "grad_norm": 7.088445178297393, "learning_rate": 5.447248374598049e-06, "loss": 17.5954, "step": 26662 }, { "epoch": 0.48737821485367505, "grad_norm": 6.056738429756884, "learning_rate": 5.44695354888512e-06, "loss": 17.2938, "step": 26663 }, { "epoch": 0.48739649405012153, "grad_norm": 5.597497715489482, "learning_rate": 5.446658721605657e-06, "loss": 17.4139, "step": 26664 }, { "epoch": 0.48741477324656807, "grad_norm": 7.8833620968787725, "learning_rate": 5.446363892760694e-06, "loss": 18.2431, "step": 26665 }, { "epoch": 0.4874330524430146, "grad_norm": 5.829542433580899, "learning_rate": 5.446069062351265e-06, "loss": 17.4375, "step": 26666 }, { "epoch": 0.48745133163946114, "grad_norm": 7.326167912127498, "learning_rate": 5.445774230378402e-06, "loss": 18.1475, "step": 26667 }, { "epoch": 0.4874696108359077, "grad_norm": 8.159616676986518, "learning_rate": 5.44547939684314e-06, "loss": 17.8864, "step": 26668 }, { "epoch": 0.48748789003235415, "grad_norm": 6.733878280865458, "learning_rate": 5.445184561746511e-06, "loss": 17.5365, "step": 26669 }, { "epoch": 0.4875061692288007, "grad_norm": 6.150416170308892, "learning_rate": 5.444889725089548e-06, "loss": 17.3496, "step": 26670 }, { "epoch": 0.4875244484252472, "grad_norm": 5.852591058993894, "learning_rate": 5.444594886873286e-06, "loss": 17.3277, "step": 26671 }, { "epoch": 0.48754272762169376, "grad_norm": 7.050355774784577, "learning_rate": 5.444300047098756e-06, "loss": 17.8252, "step": 26672 }, { "epoch": 0.4875610068181403, "grad_norm": 5.101136321964566, "learning_rate": 5.4440052057669934e-06, "loss": 16.9262, "step": 26673 }, { "epoch": 0.4875792860145868, "grad_norm": 6.171134819997972, "learning_rate": 5.443710362879033e-06, "loss": 17.3672, "step": 26674 }, { "epoch": 0.4875975652110333, "grad_norm": 8.424747184849217, "learning_rate": 5.443415518435905e-06, "loss": 18.4778, "step": 26675 }, { "epoch": 0.48761584440747985, "grad_norm": 6.663080208183747, "learning_rate": 5.443120672438643e-06, "loss": 17.5707, "step": 26676 }, { "epoch": 0.4876341236039264, "grad_norm": 5.795464634344631, "learning_rate": 5.442825824888283e-06, "loss": 17.2108, "step": 26677 }, { "epoch": 0.4876524028003729, "grad_norm": 5.469798270017111, "learning_rate": 5.442530975785855e-06, "loss": 17.1255, "step": 26678 }, { "epoch": 0.4876706819968194, "grad_norm": 7.341864107316959, "learning_rate": 5.442236125132397e-06, "loss": 17.7743, "step": 26679 }, { "epoch": 0.48768896119326594, "grad_norm": 6.446259241788843, "learning_rate": 5.441941272928939e-06, "loss": 17.5795, "step": 26680 }, { "epoch": 0.48770724038971247, "grad_norm": 6.32346501154039, "learning_rate": 5.441646419176514e-06, "loss": 17.399, "step": 26681 }, { "epoch": 0.487725519586159, "grad_norm": 7.253957061840832, "learning_rate": 5.441351563876157e-06, "loss": 17.7322, "step": 26682 }, { "epoch": 0.48774379878260554, "grad_norm": 8.251096577384583, "learning_rate": 5.441056707028901e-06, "loss": 17.5844, "step": 26683 }, { "epoch": 0.487762077979052, "grad_norm": 8.309287226384034, "learning_rate": 5.440761848635781e-06, "loss": 18.3265, "step": 26684 }, { "epoch": 0.48778035717549856, "grad_norm": 6.478423372830788, "learning_rate": 5.440466988697828e-06, "loss": 17.7607, "step": 26685 }, { "epoch": 0.4877986363719451, "grad_norm": 6.46679214652241, "learning_rate": 5.440172127216076e-06, "loss": 17.3941, "step": 26686 }, { "epoch": 0.48781691556839163, "grad_norm": 6.592282836976066, "learning_rate": 5.43987726419156e-06, "loss": 17.6032, "step": 26687 }, { "epoch": 0.48783519476483816, "grad_norm": 6.687011106745191, "learning_rate": 5.439582399625311e-06, "loss": 17.6042, "step": 26688 }, { "epoch": 0.48785347396128464, "grad_norm": 6.168841621335481, "learning_rate": 5.439287533518364e-06, "loss": 17.363, "step": 26689 }, { "epoch": 0.4878717531577312, "grad_norm": 5.826934054760655, "learning_rate": 5.438992665871753e-06, "loss": 17.2163, "step": 26690 }, { "epoch": 0.4878900323541777, "grad_norm": 6.508074954201721, "learning_rate": 5.438697796686511e-06, "loss": 17.7818, "step": 26691 }, { "epoch": 0.48790831155062425, "grad_norm": 5.347730389877579, "learning_rate": 5.438402925963669e-06, "loss": 17.0873, "step": 26692 }, { "epoch": 0.48792659074707073, "grad_norm": 5.735453219002497, "learning_rate": 5.4381080537042655e-06, "loss": 17.0578, "step": 26693 }, { "epoch": 0.48794486994351727, "grad_norm": 5.871442657524678, "learning_rate": 5.437813179909332e-06, "loss": 17.3047, "step": 26694 }, { "epoch": 0.4879631491399638, "grad_norm": 6.4683578382875435, "learning_rate": 5.437518304579898e-06, "loss": 17.4914, "step": 26695 }, { "epoch": 0.48798142833641034, "grad_norm": 5.744793784154497, "learning_rate": 5.437223427717001e-06, "loss": 17.2997, "step": 26696 }, { "epoch": 0.4879997075328569, "grad_norm": 5.867712906536801, "learning_rate": 5.436928549321675e-06, "loss": 16.928, "step": 26697 }, { "epoch": 0.48801798672930335, "grad_norm": 6.391154608621728, "learning_rate": 5.436633669394953e-06, "loss": 17.5694, "step": 26698 }, { "epoch": 0.4880362659257499, "grad_norm": 5.955561226736121, "learning_rate": 5.436338787937867e-06, "loss": 17.1313, "step": 26699 }, { "epoch": 0.4880545451221964, "grad_norm": 5.836660560356295, "learning_rate": 5.43604390495145e-06, "loss": 17.2105, "step": 26700 }, { "epoch": 0.48807282431864296, "grad_norm": 6.2484085719067295, "learning_rate": 5.43574902043674e-06, "loss": 17.5134, "step": 26701 }, { "epoch": 0.4880911035150895, "grad_norm": 7.114590802442527, "learning_rate": 5.435454134394765e-06, "loss": 17.5665, "step": 26702 }, { "epoch": 0.488109382711536, "grad_norm": 7.373815281828023, "learning_rate": 5.4351592468265625e-06, "loss": 17.5261, "step": 26703 }, { "epoch": 0.4881276619079825, "grad_norm": 8.493552962876162, "learning_rate": 5.434864357733164e-06, "loss": 18.0959, "step": 26704 }, { "epoch": 0.48814594110442905, "grad_norm": 6.985858148092461, "learning_rate": 5.434569467115604e-06, "loss": 17.6583, "step": 26705 }, { "epoch": 0.4881642203008756, "grad_norm": 6.05518898261649, "learning_rate": 5.434274574974915e-06, "loss": 17.3173, "step": 26706 }, { "epoch": 0.4881824994973221, "grad_norm": 7.075867296689358, "learning_rate": 5.433979681312131e-06, "loss": 17.7777, "step": 26707 }, { "epoch": 0.4882007786937686, "grad_norm": 7.078355166872703, "learning_rate": 5.433684786128287e-06, "loss": 18.022, "step": 26708 }, { "epoch": 0.48821905789021514, "grad_norm": 6.155883790054182, "learning_rate": 5.433389889424416e-06, "loss": 17.5531, "step": 26709 }, { "epoch": 0.48823733708666167, "grad_norm": 7.2286511400033175, "learning_rate": 5.433094991201549e-06, "loss": 17.9021, "step": 26710 }, { "epoch": 0.4882556162831082, "grad_norm": 4.98670601372043, "learning_rate": 5.432800091460723e-06, "loss": 16.9237, "step": 26711 }, { "epoch": 0.48827389547955474, "grad_norm": 7.6114791374891695, "learning_rate": 5.43250519020297e-06, "loss": 18.0662, "step": 26712 }, { "epoch": 0.4882921746760012, "grad_norm": 7.510468745586594, "learning_rate": 5.432210287429324e-06, "loss": 17.7714, "step": 26713 }, { "epoch": 0.48831045387244776, "grad_norm": 6.251969574222383, "learning_rate": 5.4319153831408176e-06, "loss": 17.5097, "step": 26714 }, { "epoch": 0.4883287330688943, "grad_norm": 7.079435065952679, "learning_rate": 5.4316204773384865e-06, "loss": 17.6973, "step": 26715 }, { "epoch": 0.48834701226534083, "grad_norm": 7.955138314761039, "learning_rate": 5.431325570023362e-06, "loss": 17.8779, "step": 26716 }, { "epoch": 0.48836529146178737, "grad_norm": 6.933085599949644, "learning_rate": 5.431030661196481e-06, "loss": 17.819, "step": 26717 }, { "epoch": 0.48838357065823385, "grad_norm": 6.230553472893436, "learning_rate": 5.430735750858872e-06, "loss": 17.508, "step": 26718 }, { "epoch": 0.4884018498546804, "grad_norm": 5.705069411832746, "learning_rate": 5.430440839011572e-06, "loss": 17.1437, "step": 26719 }, { "epoch": 0.4884201290511269, "grad_norm": 7.904679814923992, "learning_rate": 5.430145925655616e-06, "loss": 18.4188, "step": 26720 }, { "epoch": 0.48843840824757345, "grad_norm": 5.547515061694185, "learning_rate": 5.4298510107920345e-06, "loss": 17.1844, "step": 26721 }, { "epoch": 0.48845668744402, "grad_norm": 5.882950277228873, "learning_rate": 5.429556094421863e-06, "loss": 17.1431, "step": 26722 }, { "epoch": 0.48847496664046647, "grad_norm": 5.959155236936194, "learning_rate": 5.429261176546134e-06, "loss": 17.1748, "step": 26723 }, { "epoch": 0.488493245836913, "grad_norm": 6.085907969524742, "learning_rate": 5.428966257165882e-06, "loss": 17.637, "step": 26724 }, { "epoch": 0.48851152503335954, "grad_norm": 5.442096106554661, "learning_rate": 5.428671336282142e-06, "loss": 17.1573, "step": 26725 }, { "epoch": 0.4885298042298061, "grad_norm": 6.044868232271571, "learning_rate": 5.428376413895945e-06, "loss": 17.719, "step": 26726 }, { "epoch": 0.48854808342625256, "grad_norm": 6.206093193676755, "learning_rate": 5.428081490008325e-06, "loss": 17.208, "step": 26727 }, { "epoch": 0.4885663626226991, "grad_norm": 4.799014605211223, "learning_rate": 5.427786564620318e-06, "loss": 16.8918, "step": 26728 }, { "epoch": 0.4885846418191456, "grad_norm": 6.832359469861618, "learning_rate": 5.4274916377329556e-06, "loss": 17.8295, "step": 26729 }, { "epoch": 0.48860292101559216, "grad_norm": 6.656490306847209, "learning_rate": 5.427196709347272e-06, "loss": 17.855, "step": 26730 }, { "epoch": 0.4886212002120387, "grad_norm": 5.792625739089165, "learning_rate": 5.4269017794643006e-06, "loss": 17.1727, "step": 26731 }, { "epoch": 0.4886394794084852, "grad_norm": 6.971258159023458, "learning_rate": 5.426606848085076e-06, "loss": 17.5696, "step": 26732 }, { "epoch": 0.4886577586049317, "grad_norm": 5.25932462163262, "learning_rate": 5.426311915210633e-06, "loss": 16.901, "step": 26733 }, { "epoch": 0.48867603780137825, "grad_norm": 7.260817596924251, "learning_rate": 5.426016980842002e-06, "loss": 18.1982, "step": 26734 }, { "epoch": 0.4886943169978248, "grad_norm": 8.153119422459952, "learning_rate": 5.425722044980217e-06, "loss": 18.3199, "step": 26735 }, { "epoch": 0.4887125961942713, "grad_norm": 6.048124673936369, "learning_rate": 5.425427107626316e-06, "loss": 17.2398, "step": 26736 }, { "epoch": 0.4887308753907178, "grad_norm": 7.214994033609668, "learning_rate": 5.425132168781328e-06, "loss": 17.6843, "step": 26737 }, { "epoch": 0.48874915458716434, "grad_norm": 6.063501555656492, "learning_rate": 5.424837228446289e-06, "loss": 17.4596, "step": 26738 }, { "epoch": 0.48876743378361087, "grad_norm": 5.529805401716335, "learning_rate": 5.424542286622234e-06, "loss": 17.1915, "step": 26739 }, { "epoch": 0.4887857129800574, "grad_norm": 5.316192857659175, "learning_rate": 5.424247343310193e-06, "loss": 17.0905, "step": 26740 }, { "epoch": 0.48880399217650394, "grad_norm": 7.14134986856988, "learning_rate": 5.423952398511202e-06, "loss": 17.7756, "step": 26741 }, { "epoch": 0.4888222713729504, "grad_norm": 5.6608215942570155, "learning_rate": 5.4236574522262955e-06, "loss": 17.1622, "step": 26742 }, { "epoch": 0.48884055056939696, "grad_norm": 7.754954075097137, "learning_rate": 5.4233625044565075e-06, "loss": 17.5948, "step": 26743 }, { "epoch": 0.4888588297658435, "grad_norm": 6.304006626992727, "learning_rate": 5.4230675552028686e-06, "loss": 17.4356, "step": 26744 }, { "epoch": 0.48887710896229003, "grad_norm": 7.6598036781548435, "learning_rate": 5.4227726044664154e-06, "loss": 18.1548, "step": 26745 }, { "epoch": 0.48889538815873657, "grad_norm": 5.043567632997474, "learning_rate": 5.4224776522481815e-06, "loss": 16.9123, "step": 26746 }, { "epoch": 0.48891366735518305, "grad_norm": 7.4474356120256795, "learning_rate": 5.4221826985491984e-06, "loss": 17.4138, "step": 26747 }, { "epoch": 0.4889319465516296, "grad_norm": 6.580050828900662, "learning_rate": 5.421887743370503e-06, "loss": 17.678, "step": 26748 }, { "epoch": 0.4889502257480761, "grad_norm": 5.914436484098127, "learning_rate": 5.421592786713128e-06, "loss": 17.3221, "step": 26749 }, { "epoch": 0.48896850494452265, "grad_norm": 7.040745101047383, "learning_rate": 5.421297828578106e-06, "loss": 18.0544, "step": 26750 }, { "epoch": 0.4889867841409692, "grad_norm": 5.439251177141843, "learning_rate": 5.4210028689664715e-06, "loss": 17.0645, "step": 26751 }, { "epoch": 0.48900506333741567, "grad_norm": 7.458356758409796, "learning_rate": 5.420707907879259e-06, "loss": 17.536, "step": 26752 }, { "epoch": 0.4890233425338622, "grad_norm": 6.508491627841003, "learning_rate": 5.420412945317502e-06, "loss": 17.4326, "step": 26753 }, { "epoch": 0.48904162173030874, "grad_norm": 6.51150504386778, "learning_rate": 5.420117981282234e-06, "loss": 17.5608, "step": 26754 }, { "epoch": 0.4890599009267553, "grad_norm": 6.116798502026182, "learning_rate": 5.419823015774488e-06, "loss": 17.3379, "step": 26755 }, { "epoch": 0.4890781801232018, "grad_norm": 7.176310260618286, "learning_rate": 5.419528048795301e-06, "loss": 17.9868, "step": 26756 }, { "epoch": 0.4890964593196483, "grad_norm": 6.910287149539001, "learning_rate": 5.419233080345702e-06, "loss": 17.8103, "step": 26757 }, { "epoch": 0.4891147385160948, "grad_norm": 6.065923872504756, "learning_rate": 5.418938110426729e-06, "loss": 17.4823, "step": 26758 }, { "epoch": 0.48913301771254136, "grad_norm": 5.272226781807781, "learning_rate": 5.4186431390394124e-06, "loss": 16.9866, "step": 26759 }, { "epoch": 0.4891512969089879, "grad_norm": 6.766457150853006, "learning_rate": 5.41834816618479e-06, "loss": 17.6785, "step": 26760 }, { "epoch": 0.4891695761054344, "grad_norm": 5.990333341937009, "learning_rate": 5.418053191863893e-06, "loss": 17.3857, "step": 26761 }, { "epoch": 0.4891878553018809, "grad_norm": 4.845768868473213, "learning_rate": 5.417758216077756e-06, "loss": 16.8998, "step": 26762 }, { "epoch": 0.48920613449832745, "grad_norm": 6.316455854819885, "learning_rate": 5.417463238827413e-06, "loss": 17.5497, "step": 26763 }, { "epoch": 0.489224413694774, "grad_norm": 5.425386887917177, "learning_rate": 5.417168260113896e-06, "loss": 16.943, "step": 26764 }, { "epoch": 0.4892426928912205, "grad_norm": 6.336866161571187, "learning_rate": 5.416873279938241e-06, "loss": 17.459, "step": 26765 }, { "epoch": 0.489260972087667, "grad_norm": 7.594505821619773, "learning_rate": 5.4165782983014825e-06, "loss": 17.9707, "step": 26766 }, { "epoch": 0.48927925128411354, "grad_norm": 6.511043350662532, "learning_rate": 5.416283315204652e-06, "loss": 17.4195, "step": 26767 }, { "epoch": 0.4892975304805601, "grad_norm": 6.222490785112991, "learning_rate": 5.415988330648785e-06, "loss": 17.5351, "step": 26768 }, { "epoch": 0.4893158096770066, "grad_norm": 6.5235654266904755, "learning_rate": 5.415693344634916e-06, "loss": 17.4953, "step": 26769 }, { "epoch": 0.48933408887345314, "grad_norm": 7.537106258040323, "learning_rate": 5.415398357164078e-06, "loss": 17.7087, "step": 26770 }, { "epoch": 0.4893523680698996, "grad_norm": 6.006885768011372, "learning_rate": 5.4151033682373035e-06, "loss": 17.4608, "step": 26771 }, { "epoch": 0.48937064726634616, "grad_norm": 5.3133142659177635, "learning_rate": 5.414808377855626e-06, "loss": 17.1791, "step": 26772 }, { "epoch": 0.4893889264627927, "grad_norm": 6.612993465288934, "learning_rate": 5.414513386020084e-06, "loss": 18.1677, "step": 26773 }, { "epoch": 0.48940720565923923, "grad_norm": 6.834958480775478, "learning_rate": 5.414218392731708e-06, "loss": 17.8332, "step": 26774 }, { "epoch": 0.48942548485568577, "grad_norm": 6.685691737836291, "learning_rate": 5.413923397991532e-06, "loss": 17.0545, "step": 26775 }, { "epoch": 0.48944376405213225, "grad_norm": 6.097640583030748, "learning_rate": 5.413628401800591e-06, "loss": 17.3799, "step": 26776 }, { "epoch": 0.4894620432485788, "grad_norm": 6.762595586009303, "learning_rate": 5.413333404159917e-06, "loss": 17.6671, "step": 26777 }, { "epoch": 0.4894803224450253, "grad_norm": 5.749003583807096, "learning_rate": 5.413038405070547e-06, "loss": 17.1119, "step": 26778 }, { "epoch": 0.48949860164147185, "grad_norm": 5.646286438711893, "learning_rate": 5.412743404533512e-06, "loss": 17.493, "step": 26779 }, { "epoch": 0.4895168808379184, "grad_norm": 7.391000562632388, "learning_rate": 5.412448402549848e-06, "loss": 17.8051, "step": 26780 }, { "epoch": 0.48953516003436487, "grad_norm": 5.408967309528288, "learning_rate": 5.4121533991205875e-06, "loss": 17.2397, "step": 26781 }, { "epoch": 0.4895534392308114, "grad_norm": 6.601831321266045, "learning_rate": 5.411858394246765e-06, "loss": 17.4203, "step": 26782 }, { "epoch": 0.48957171842725794, "grad_norm": 5.480369766172453, "learning_rate": 5.411563387929415e-06, "loss": 17.2981, "step": 26783 }, { "epoch": 0.4895899976237045, "grad_norm": 6.8605239151113695, "learning_rate": 5.411268380169572e-06, "loss": 17.771, "step": 26784 }, { "epoch": 0.489608276820151, "grad_norm": 6.873494468844328, "learning_rate": 5.410973370968268e-06, "loss": 17.6408, "step": 26785 }, { "epoch": 0.4896265560165975, "grad_norm": 5.889293413290619, "learning_rate": 5.410678360326537e-06, "loss": 17.2507, "step": 26786 }, { "epoch": 0.489644835213044, "grad_norm": 5.892544846947069, "learning_rate": 5.410383348245416e-06, "loss": 17.0728, "step": 26787 }, { "epoch": 0.48966311440949056, "grad_norm": 7.835531562358622, "learning_rate": 5.4100883347259355e-06, "loss": 18.115, "step": 26788 }, { "epoch": 0.4896813936059371, "grad_norm": 6.599834990120285, "learning_rate": 5.4097933197691325e-06, "loss": 17.5977, "step": 26789 }, { "epoch": 0.48969967280238363, "grad_norm": 5.751483476441311, "learning_rate": 5.409498303376038e-06, "loss": 17.186, "step": 26790 }, { "epoch": 0.4897179519988301, "grad_norm": 5.943528801227008, "learning_rate": 5.409203285547687e-06, "loss": 17.4003, "step": 26791 }, { "epoch": 0.48973623119527665, "grad_norm": 6.631362999406427, "learning_rate": 5.408908266285116e-06, "loss": 17.3434, "step": 26792 }, { "epoch": 0.4897545103917232, "grad_norm": 8.261633001616998, "learning_rate": 5.408613245589354e-06, "loss": 18.0013, "step": 26793 }, { "epoch": 0.4897727895881697, "grad_norm": 9.316514163925088, "learning_rate": 5.408318223461441e-06, "loss": 18.3173, "step": 26794 }, { "epoch": 0.4897910687846162, "grad_norm": 5.632237732045882, "learning_rate": 5.408023199902407e-06, "loss": 17.2808, "step": 26795 }, { "epoch": 0.48980934798106274, "grad_norm": 7.090079701011441, "learning_rate": 5.407728174913287e-06, "loss": 17.7669, "step": 26796 }, { "epoch": 0.4898276271775093, "grad_norm": 7.082274922460304, "learning_rate": 5.407433148495115e-06, "loss": 17.6108, "step": 26797 }, { "epoch": 0.4898459063739558, "grad_norm": 5.921376319212949, "learning_rate": 5.407138120648926e-06, "loss": 17.4497, "step": 26798 }, { "epoch": 0.48986418557040234, "grad_norm": 5.088669060802675, "learning_rate": 5.406843091375752e-06, "loss": 17.1594, "step": 26799 }, { "epoch": 0.4898824647668488, "grad_norm": 9.112998331090383, "learning_rate": 5.406548060676629e-06, "loss": 18.656, "step": 26800 }, { "epoch": 0.48990074396329536, "grad_norm": 6.64795311345859, "learning_rate": 5.40625302855259e-06, "loss": 17.4863, "step": 26801 }, { "epoch": 0.4899190231597419, "grad_norm": 6.893498065757439, "learning_rate": 5.40595799500467e-06, "loss": 17.5541, "step": 26802 }, { "epoch": 0.48993730235618843, "grad_norm": 8.16370549613025, "learning_rate": 5.405662960033902e-06, "loss": 18.6281, "step": 26803 }, { "epoch": 0.48995558155263497, "grad_norm": 6.470659436684596, "learning_rate": 5.405367923641319e-06, "loss": 17.524, "step": 26804 }, { "epoch": 0.48997386074908145, "grad_norm": 6.170261979075298, "learning_rate": 5.40507288582796e-06, "loss": 17.555, "step": 26805 }, { "epoch": 0.489992139945528, "grad_norm": 4.9576388587498315, "learning_rate": 5.404777846594853e-06, "loss": 16.8701, "step": 26806 }, { "epoch": 0.4900104191419745, "grad_norm": 6.438231315488346, "learning_rate": 5.4044828059430355e-06, "loss": 17.6076, "step": 26807 }, { "epoch": 0.49002869833842105, "grad_norm": 7.5781883995260255, "learning_rate": 5.4041877638735405e-06, "loss": 17.563, "step": 26808 }, { "epoch": 0.4900469775348676, "grad_norm": 6.578265249848708, "learning_rate": 5.403892720387404e-06, "loss": 17.3995, "step": 26809 }, { "epoch": 0.49006525673131407, "grad_norm": 5.285911452760043, "learning_rate": 5.403597675485657e-06, "loss": 17.0243, "step": 26810 }, { "epoch": 0.4900835359277606, "grad_norm": 5.795312727202839, "learning_rate": 5.403302629169336e-06, "loss": 17.2022, "step": 26811 }, { "epoch": 0.49010181512420714, "grad_norm": 9.374189870011783, "learning_rate": 5.403007581439475e-06, "loss": 17.405, "step": 26812 }, { "epoch": 0.4901200943206537, "grad_norm": 7.492876513342417, "learning_rate": 5.4027125322971045e-06, "loss": 18.029, "step": 26813 }, { "epoch": 0.4901383735171002, "grad_norm": 5.4262779633071165, "learning_rate": 5.4024174817432624e-06, "loss": 17.2035, "step": 26814 }, { "epoch": 0.4901566527135467, "grad_norm": 6.46063539353455, "learning_rate": 5.402122429778985e-06, "loss": 17.7318, "step": 26815 }, { "epoch": 0.49017493190999323, "grad_norm": 6.131022203123187, "learning_rate": 5.401827376405301e-06, "loss": 17.5596, "step": 26816 }, { "epoch": 0.49019321110643976, "grad_norm": 6.43573483444271, "learning_rate": 5.401532321623247e-06, "loss": 17.4997, "step": 26817 }, { "epoch": 0.4902114903028863, "grad_norm": 6.267315419116112, "learning_rate": 5.401237265433857e-06, "loss": 17.2701, "step": 26818 }, { "epoch": 0.49022976949933283, "grad_norm": 8.349651440043182, "learning_rate": 5.400942207838166e-06, "loss": 18.0184, "step": 26819 }, { "epoch": 0.4902480486957793, "grad_norm": 5.849351097855939, "learning_rate": 5.400647148837207e-06, "loss": 17.2519, "step": 26820 }, { "epoch": 0.49026632789222585, "grad_norm": 5.557082938829403, "learning_rate": 5.400352088432014e-06, "loss": 17.2449, "step": 26821 }, { "epoch": 0.4902846070886724, "grad_norm": 7.659280914258292, "learning_rate": 5.400057026623622e-06, "loss": 17.6655, "step": 26822 }, { "epoch": 0.4903028862851189, "grad_norm": 6.293562110702888, "learning_rate": 5.399761963413065e-06, "loss": 17.3693, "step": 26823 }, { "epoch": 0.49032116548156546, "grad_norm": 5.320283229261041, "learning_rate": 5.399466898801377e-06, "loss": 17.1344, "step": 26824 }, { "epoch": 0.49033944467801194, "grad_norm": 5.3788484838259, "learning_rate": 5.3991718327895925e-06, "loss": 17.0159, "step": 26825 }, { "epoch": 0.4903577238744585, "grad_norm": 7.665377172028155, "learning_rate": 5.398876765378744e-06, "loss": 18.0574, "step": 26826 }, { "epoch": 0.490376003070905, "grad_norm": 6.049932660681751, "learning_rate": 5.398581696569868e-06, "loss": 17.2714, "step": 26827 }, { "epoch": 0.49039428226735154, "grad_norm": 6.2361116173369044, "learning_rate": 5.398286626363996e-06, "loss": 17.4638, "step": 26828 }, { "epoch": 0.490412561463798, "grad_norm": 6.112522394228724, "learning_rate": 5.397991554762167e-06, "loss": 17.3001, "step": 26829 }, { "epoch": 0.49043084066024456, "grad_norm": 5.640500088782503, "learning_rate": 5.3976964817654106e-06, "loss": 17.0323, "step": 26830 }, { "epoch": 0.4904491198566911, "grad_norm": 7.223058265261146, "learning_rate": 5.397401407374763e-06, "loss": 17.7531, "step": 26831 }, { "epoch": 0.49046739905313763, "grad_norm": 10.137905839670566, "learning_rate": 5.3971063315912575e-06, "loss": 17.808, "step": 26832 }, { "epoch": 0.49048567824958417, "grad_norm": 5.570050306643148, "learning_rate": 5.396811254415929e-06, "loss": 17.0955, "step": 26833 }, { "epoch": 0.49050395744603065, "grad_norm": 6.471424733456603, "learning_rate": 5.396516175849812e-06, "loss": 17.4727, "step": 26834 }, { "epoch": 0.4905222366424772, "grad_norm": 7.28869555481653, "learning_rate": 5.396221095893938e-06, "loss": 18.055, "step": 26835 }, { "epoch": 0.4905405158389237, "grad_norm": 6.113599979603523, "learning_rate": 5.395926014549347e-06, "loss": 17.2481, "step": 26836 }, { "epoch": 0.49055879503537025, "grad_norm": 6.477612647925058, "learning_rate": 5.395630931817066e-06, "loss": 17.579, "step": 26837 }, { "epoch": 0.4905770742318168, "grad_norm": 5.570082626655907, "learning_rate": 5.3953358476981355e-06, "loss": 17.0483, "step": 26838 }, { "epoch": 0.49059535342826327, "grad_norm": 6.5939035359793685, "learning_rate": 5.395040762193587e-06, "loss": 17.5565, "step": 26839 }, { "epoch": 0.4906136326247098, "grad_norm": 5.889521432332619, "learning_rate": 5.394745675304453e-06, "loss": 17.3782, "step": 26840 }, { "epoch": 0.49063191182115634, "grad_norm": 5.810556861786461, "learning_rate": 5.394450587031771e-06, "loss": 17.1271, "step": 26841 }, { "epoch": 0.4906501910176029, "grad_norm": 7.603444045410603, "learning_rate": 5.3941554973765745e-06, "loss": 18.1476, "step": 26842 }, { "epoch": 0.4906684702140494, "grad_norm": 8.524801879786166, "learning_rate": 5.393860406339897e-06, "loss": 18.5666, "step": 26843 }, { "epoch": 0.4906867494104959, "grad_norm": 6.091587868190396, "learning_rate": 5.3935653139227714e-06, "loss": 17.3453, "step": 26844 }, { "epoch": 0.49070502860694243, "grad_norm": 5.373096735902344, "learning_rate": 5.393270220126235e-06, "loss": 17.1939, "step": 26845 }, { "epoch": 0.49072330780338896, "grad_norm": 6.959521133381509, "learning_rate": 5.39297512495132e-06, "loss": 17.8511, "step": 26846 }, { "epoch": 0.4907415869998355, "grad_norm": 5.851506965114323, "learning_rate": 5.392680028399062e-06, "loss": 17.2155, "step": 26847 }, { "epoch": 0.49075986619628204, "grad_norm": 7.532761371224967, "learning_rate": 5.392384930470493e-06, "loss": 17.6946, "step": 26848 }, { "epoch": 0.4907781453927285, "grad_norm": 6.0838953034197365, "learning_rate": 5.39208983116665e-06, "loss": 17.4837, "step": 26849 }, { "epoch": 0.49079642458917505, "grad_norm": 7.050171855548302, "learning_rate": 5.391794730488567e-06, "loss": 18.0847, "step": 26850 }, { "epoch": 0.4908147037856216, "grad_norm": 5.740280096755192, "learning_rate": 5.3914996284372756e-06, "loss": 17.4159, "step": 26851 }, { "epoch": 0.4908329829820681, "grad_norm": 7.279768457602669, "learning_rate": 5.391204525013814e-06, "loss": 17.7201, "step": 26852 }, { "epoch": 0.49085126217851466, "grad_norm": 6.894222463107142, "learning_rate": 5.390909420219213e-06, "loss": 17.1891, "step": 26853 }, { "epoch": 0.49086954137496114, "grad_norm": 5.797836351655732, "learning_rate": 5.390614314054509e-06, "loss": 17.4896, "step": 26854 }, { "epoch": 0.4908878205714077, "grad_norm": 6.95220083501087, "learning_rate": 5.390319206520734e-06, "loss": 17.8335, "step": 26855 }, { "epoch": 0.4909060997678542, "grad_norm": 6.383801639281826, "learning_rate": 5.3900240976189275e-06, "loss": 17.5907, "step": 26856 }, { "epoch": 0.49092437896430074, "grad_norm": 6.0585700664970075, "learning_rate": 5.389728987350118e-06, "loss": 17.3967, "step": 26857 }, { "epoch": 0.4909426581607473, "grad_norm": 5.4990854133711515, "learning_rate": 5.389433875715343e-06, "loss": 17.2939, "step": 26858 }, { "epoch": 0.49096093735719376, "grad_norm": 6.031399634538206, "learning_rate": 5.389138762715634e-06, "loss": 17.4824, "step": 26859 }, { "epoch": 0.4909792165536403, "grad_norm": 5.960467649815992, "learning_rate": 5.38884364835203e-06, "loss": 17.5735, "step": 26860 }, { "epoch": 0.49099749575008683, "grad_norm": 6.363918885303285, "learning_rate": 5.388548532625562e-06, "loss": 17.4231, "step": 26861 }, { "epoch": 0.49101577494653337, "grad_norm": 6.799854083715129, "learning_rate": 5.388253415537266e-06, "loss": 17.46, "step": 26862 }, { "epoch": 0.49103405414297985, "grad_norm": 5.419690651476267, "learning_rate": 5.387958297088174e-06, "loss": 17.1368, "step": 26863 }, { "epoch": 0.4910523333394264, "grad_norm": 5.6065718145011365, "learning_rate": 5.387663177279322e-06, "loss": 17.1557, "step": 26864 }, { "epoch": 0.4910706125358729, "grad_norm": 7.392491276085907, "learning_rate": 5.3873680561117444e-06, "loss": 18.1115, "step": 26865 }, { "epoch": 0.49108889173231945, "grad_norm": 5.540634482165595, "learning_rate": 5.387072933586476e-06, "loss": 17.0831, "step": 26866 }, { "epoch": 0.491107170928766, "grad_norm": 6.49316476778211, "learning_rate": 5.386777809704549e-06, "loss": 17.6583, "step": 26867 }, { "epoch": 0.49112545012521247, "grad_norm": 5.272607120352341, "learning_rate": 5.386482684467e-06, "loss": 17.3372, "step": 26868 }, { "epoch": 0.491143729321659, "grad_norm": 7.280203647564744, "learning_rate": 5.386187557874864e-06, "loss": 17.0905, "step": 26869 }, { "epoch": 0.49116200851810554, "grad_norm": 6.901638087373839, "learning_rate": 5.385892429929174e-06, "loss": 17.4576, "step": 26870 }, { "epoch": 0.4911802877145521, "grad_norm": 6.4241817384312006, "learning_rate": 5.385597300630964e-06, "loss": 17.5416, "step": 26871 }, { "epoch": 0.4911985669109986, "grad_norm": 7.21240344430058, "learning_rate": 5.385302169981267e-06, "loss": 18.199, "step": 26872 }, { "epoch": 0.4912168461074451, "grad_norm": 6.461777851863109, "learning_rate": 5.385007037981122e-06, "loss": 17.4993, "step": 26873 }, { "epoch": 0.49123512530389163, "grad_norm": 6.32818213034626, "learning_rate": 5.38471190463156e-06, "loss": 17.6206, "step": 26874 }, { "epoch": 0.49125340450033816, "grad_norm": 6.541881916707459, "learning_rate": 5.384416769933616e-06, "loss": 17.6468, "step": 26875 }, { "epoch": 0.4912716836967847, "grad_norm": 5.788408433951197, "learning_rate": 5.384121633888324e-06, "loss": 17.363, "step": 26876 }, { "epoch": 0.49128996289323124, "grad_norm": 7.48705751623344, "learning_rate": 5.38382649649672e-06, "loss": 17.7163, "step": 26877 }, { "epoch": 0.4913082420896777, "grad_norm": 6.054534722697527, "learning_rate": 5.383531357759837e-06, "loss": 17.2928, "step": 26878 }, { "epoch": 0.49132652128612425, "grad_norm": 5.686697007101629, "learning_rate": 5.383236217678709e-06, "loss": 16.9443, "step": 26879 }, { "epoch": 0.4913448004825708, "grad_norm": 5.154853867221452, "learning_rate": 5.382941076254372e-06, "loss": 17.0009, "step": 26880 }, { "epoch": 0.4913630796790173, "grad_norm": 9.232364566283492, "learning_rate": 5.38264593348786e-06, "loss": 18.3791, "step": 26881 }, { "epoch": 0.49138135887546386, "grad_norm": 5.6116698874649895, "learning_rate": 5.382350789380207e-06, "loss": 17.5436, "step": 26882 }, { "epoch": 0.49139963807191034, "grad_norm": 5.797979642885264, "learning_rate": 5.3820556439324476e-06, "loss": 17.2975, "step": 26883 }, { "epoch": 0.4914179172683569, "grad_norm": 6.644032793815194, "learning_rate": 5.381760497145617e-06, "loss": 17.4729, "step": 26884 }, { "epoch": 0.4914361964648034, "grad_norm": 6.307802392799059, "learning_rate": 5.381465349020749e-06, "loss": 17.5992, "step": 26885 }, { "epoch": 0.49145447566124995, "grad_norm": 6.2488141686992265, "learning_rate": 5.381170199558876e-06, "loss": 17.6261, "step": 26886 }, { "epoch": 0.4914727548576965, "grad_norm": 6.195354067715704, "learning_rate": 5.380875048761038e-06, "loss": 17.6299, "step": 26887 }, { "epoch": 0.49149103405414296, "grad_norm": 7.809291432483063, "learning_rate": 5.380579896628263e-06, "loss": 18.0773, "step": 26888 }, { "epoch": 0.4915093132505895, "grad_norm": 7.261728012854263, "learning_rate": 5.38028474316159e-06, "loss": 18.1113, "step": 26889 }, { "epoch": 0.49152759244703603, "grad_norm": 6.174152541252177, "learning_rate": 5.379989588362052e-06, "loss": 17.3604, "step": 26890 }, { "epoch": 0.49154587164348257, "grad_norm": 7.053717253465543, "learning_rate": 5.379694432230682e-06, "loss": 17.4238, "step": 26891 }, { "epoch": 0.4915641508399291, "grad_norm": 6.163030691150242, "learning_rate": 5.379399274768517e-06, "loss": 17.7023, "step": 26892 }, { "epoch": 0.4915824300363756, "grad_norm": 7.085142398701097, "learning_rate": 5.37910411597659e-06, "loss": 18.0284, "step": 26893 }, { "epoch": 0.4916007092328221, "grad_norm": 7.466436844686998, "learning_rate": 5.378808955855937e-06, "loss": 18.1061, "step": 26894 }, { "epoch": 0.49161898842926866, "grad_norm": 6.054243917307078, "learning_rate": 5.378513794407591e-06, "loss": 17.3603, "step": 26895 }, { "epoch": 0.4916372676257152, "grad_norm": 6.703002079725329, "learning_rate": 5.378218631632585e-06, "loss": 17.7708, "step": 26896 }, { "epoch": 0.49165554682216167, "grad_norm": 7.644869572406129, "learning_rate": 5.377923467531958e-06, "loss": 18.1343, "step": 26897 }, { "epoch": 0.4916738260186082, "grad_norm": 6.135790011801075, "learning_rate": 5.3776283021067435e-06, "loss": 17.4194, "step": 26898 }, { "epoch": 0.49169210521505474, "grad_norm": 5.248274801099542, "learning_rate": 5.37733313535797e-06, "loss": 16.8469, "step": 26899 }, { "epoch": 0.4917103844115013, "grad_norm": 5.1752921146963855, "learning_rate": 5.3770379672866805e-06, "loss": 16.9588, "step": 26900 }, { "epoch": 0.4917286636079478, "grad_norm": 6.878734534908685, "learning_rate": 5.376742797893905e-06, "loss": 17.795, "step": 26901 }, { "epoch": 0.4917469428043943, "grad_norm": 6.765103844627631, "learning_rate": 5.376447627180677e-06, "loss": 17.5713, "step": 26902 }, { "epoch": 0.49176522200084083, "grad_norm": 7.373925014002523, "learning_rate": 5.376152455148034e-06, "loss": 17.9319, "step": 26903 }, { "epoch": 0.49178350119728736, "grad_norm": 6.974871017734401, "learning_rate": 5.375857281797008e-06, "loss": 17.7886, "step": 26904 }, { "epoch": 0.4918017803937339, "grad_norm": 7.047464959961935, "learning_rate": 5.375562107128636e-06, "loss": 17.8694, "step": 26905 }, { "epoch": 0.49182005959018044, "grad_norm": 6.203225308572396, "learning_rate": 5.375266931143951e-06, "loss": 17.5135, "step": 26906 }, { "epoch": 0.4918383387866269, "grad_norm": 5.6146931985286495, "learning_rate": 5.374971753843988e-06, "loss": 17.0993, "step": 26907 }, { "epoch": 0.49185661798307345, "grad_norm": 5.788173869878248, "learning_rate": 5.374676575229782e-06, "loss": 17.3109, "step": 26908 }, { "epoch": 0.49187489717952, "grad_norm": 6.580879383156743, "learning_rate": 5.374381395302367e-06, "loss": 17.4365, "step": 26909 }, { "epoch": 0.4918931763759665, "grad_norm": 5.662605690686466, "learning_rate": 5.374086214062777e-06, "loss": 17.2884, "step": 26910 }, { "epoch": 0.49191145557241306, "grad_norm": 7.698035315986826, "learning_rate": 5.373791031512048e-06, "loss": 18.2263, "step": 26911 }, { "epoch": 0.49192973476885954, "grad_norm": 6.156024129877999, "learning_rate": 5.373495847651214e-06, "loss": 17.47, "step": 26912 }, { "epoch": 0.4919480139653061, "grad_norm": 6.802171153383905, "learning_rate": 5.373200662481308e-06, "loss": 17.6954, "step": 26913 }, { "epoch": 0.4919662931617526, "grad_norm": 6.636026493710453, "learning_rate": 5.372905476003368e-06, "loss": 17.6593, "step": 26914 }, { "epoch": 0.49198457235819915, "grad_norm": 6.484582377149246, "learning_rate": 5.372610288218426e-06, "loss": 17.4478, "step": 26915 }, { "epoch": 0.4920028515546457, "grad_norm": 6.15595223056846, "learning_rate": 5.3723150991275175e-06, "loss": 17.363, "step": 26916 }, { "epoch": 0.49202113075109216, "grad_norm": 7.695587024505982, "learning_rate": 5.372019908731676e-06, "loss": 18.4037, "step": 26917 }, { "epoch": 0.4920394099475387, "grad_norm": 6.798574774437424, "learning_rate": 5.371724717031938e-06, "loss": 17.4892, "step": 26918 }, { "epoch": 0.49205768914398523, "grad_norm": 6.815633024814039, "learning_rate": 5.371429524029337e-06, "loss": 17.726, "step": 26919 }, { "epoch": 0.49207596834043177, "grad_norm": 6.759571646412667, "learning_rate": 5.3711343297249065e-06, "loss": 17.4205, "step": 26920 }, { "epoch": 0.4920942475368783, "grad_norm": 7.2400862585694945, "learning_rate": 5.370839134119683e-06, "loss": 17.8043, "step": 26921 }, { "epoch": 0.4921125267333248, "grad_norm": 7.084659912512047, "learning_rate": 5.370543937214701e-06, "loss": 17.9527, "step": 26922 }, { "epoch": 0.4921308059297713, "grad_norm": 6.862495073162513, "learning_rate": 5.370248739010995e-06, "loss": 17.8831, "step": 26923 }, { "epoch": 0.49214908512621786, "grad_norm": 5.968072786265508, "learning_rate": 5.369953539509598e-06, "loss": 17.5994, "step": 26924 }, { "epoch": 0.4921673643226644, "grad_norm": 6.376872240610247, "learning_rate": 5.369658338711547e-06, "loss": 17.3389, "step": 26925 }, { "epoch": 0.4921856435191109, "grad_norm": 6.258884787702053, "learning_rate": 5.369363136617875e-06, "loss": 17.4756, "step": 26926 }, { "epoch": 0.4922039227155574, "grad_norm": 5.857006205686026, "learning_rate": 5.369067933229617e-06, "loss": 17.5055, "step": 26927 }, { "epoch": 0.49222220191200394, "grad_norm": 6.134324532429734, "learning_rate": 5.368772728547809e-06, "loss": 17.4956, "step": 26928 }, { "epoch": 0.4922404811084505, "grad_norm": 6.484942194754365, "learning_rate": 5.368477522573484e-06, "loss": 17.6194, "step": 26929 }, { "epoch": 0.492258760304897, "grad_norm": 5.237759670925472, "learning_rate": 5.368182315307677e-06, "loss": 16.8716, "step": 26930 }, { "epoch": 0.4922770395013435, "grad_norm": 6.164913411336027, "learning_rate": 5.367887106751423e-06, "loss": 17.584, "step": 26931 }, { "epoch": 0.49229531869779003, "grad_norm": 4.863338566559338, "learning_rate": 5.367591896905757e-06, "loss": 16.9111, "step": 26932 }, { "epoch": 0.49231359789423657, "grad_norm": 6.055456709587119, "learning_rate": 5.367296685771713e-06, "loss": 17.3858, "step": 26933 }, { "epoch": 0.4923318770906831, "grad_norm": 4.91780885037892, "learning_rate": 5.367001473350327e-06, "loss": 16.8793, "step": 26934 }, { "epoch": 0.49235015628712964, "grad_norm": 7.4618920387291565, "learning_rate": 5.366706259642631e-06, "loss": 17.7985, "step": 26935 }, { "epoch": 0.4923684354835761, "grad_norm": 6.8229652616455425, "learning_rate": 5.366411044649663e-06, "loss": 17.5554, "step": 26936 }, { "epoch": 0.49238671468002265, "grad_norm": 6.244515700441109, "learning_rate": 5.366115828372454e-06, "loss": 17.2898, "step": 26937 }, { "epoch": 0.4924049938764692, "grad_norm": 5.985505017798252, "learning_rate": 5.365820610812042e-06, "loss": 17.4043, "step": 26938 }, { "epoch": 0.4924232730729157, "grad_norm": 5.535320221329392, "learning_rate": 5.365525391969463e-06, "loss": 17.3785, "step": 26939 }, { "epoch": 0.49244155226936226, "grad_norm": 7.899752924980689, "learning_rate": 5.365230171845746e-06, "loss": 18.0108, "step": 26940 }, { "epoch": 0.49245983146580874, "grad_norm": 6.514524779167289, "learning_rate": 5.364934950441929e-06, "loss": 17.4425, "step": 26941 }, { "epoch": 0.4924781106622553, "grad_norm": 6.0603258145956, "learning_rate": 5.364639727759047e-06, "loss": 17.1233, "step": 26942 }, { "epoch": 0.4924963898587018, "grad_norm": 6.512603450804955, "learning_rate": 5.364344503798136e-06, "loss": 17.5404, "step": 26943 }, { "epoch": 0.49251466905514835, "grad_norm": 8.761079644741475, "learning_rate": 5.3640492785602285e-06, "loss": 17.5778, "step": 26944 }, { "epoch": 0.4925329482515949, "grad_norm": 6.417406889031707, "learning_rate": 5.363754052046357e-06, "loss": 17.3514, "step": 26945 }, { "epoch": 0.49255122744804136, "grad_norm": 5.894893632438456, "learning_rate": 5.3634588242575625e-06, "loss": 17.3609, "step": 26946 }, { "epoch": 0.4925695066444879, "grad_norm": 6.449317455126374, "learning_rate": 5.363163595194876e-06, "loss": 17.5889, "step": 26947 }, { "epoch": 0.49258778584093443, "grad_norm": 7.35069890110503, "learning_rate": 5.362868364859332e-06, "loss": 17.8098, "step": 26948 }, { "epoch": 0.49260606503738097, "grad_norm": 7.126230057637257, "learning_rate": 5.362573133251965e-06, "loss": 17.7459, "step": 26949 }, { "epoch": 0.4926243442338275, "grad_norm": 6.235345778518111, "learning_rate": 5.362277900373811e-06, "loss": 17.3931, "step": 26950 }, { "epoch": 0.492642623430274, "grad_norm": 6.933436583356739, "learning_rate": 5.361982666225905e-06, "loss": 17.5829, "step": 26951 }, { "epoch": 0.4926609026267205, "grad_norm": 7.389122953462225, "learning_rate": 5.361687430809282e-06, "loss": 18.1202, "step": 26952 }, { "epoch": 0.49267918182316706, "grad_norm": 7.1692490912883144, "learning_rate": 5.361392194124973e-06, "loss": 17.9091, "step": 26953 }, { "epoch": 0.4926974610196136, "grad_norm": 8.591531879847942, "learning_rate": 5.361096956174018e-06, "loss": 17.9691, "step": 26954 }, { "epoch": 0.4927157402160601, "grad_norm": 6.527397017102309, "learning_rate": 5.360801716957449e-06, "loss": 17.7767, "step": 26955 }, { "epoch": 0.4927340194125066, "grad_norm": 6.639954028183422, "learning_rate": 5.360506476476303e-06, "loss": 17.6142, "step": 26956 }, { "epoch": 0.49275229860895314, "grad_norm": 6.465569336463757, "learning_rate": 5.360211234731612e-06, "loss": 17.6567, "step": 26957 }, { "epoch": 0.4927705778053997, "grad_norm": 7.161281066801649, "learning_rate": 5.359915991724409e-06, "loss": 18.3439, "step": 26958 }, { "epoch": 0.4927888570018462, "grad_norm": 5.810592162886852, "learning_rate": 5.359620747455734e-06, "loss": 17.2661, "step": 26959 }, { "epoch": 0.49280713619829275, "grad_norm": 6.639986391388215, "learning_rate": 5.359325501926621e-06, "loss": 17.945, "step": 26960 }, { "epoch": 0.49282541539473923, "grad_norm": 5.685433823811523, "learning_rate": 5.359030255138101e-06, "loss": 17.2936, "step": 26961 }, { "epoch": 0.49284369459118577, "grad_norm": 5.729852340163938, "learning_rate": 5.358735007091212e-06, "loss": 17.2307, "step": 26962 }, { "epoch": 0.4928619737876323, "grad_norm": 6.187144348372903, "learning_rate": 5.358439757786989e-06, "loss": 17.2237, "step": 26963 }, { "epoch": 0.49288025298407884, "grad_norm": 4.332550610332634, "learning_rate": 5.358144507226464e-06, "loss": 16.6847, "step": 26964 }, { "epoch": 0.4928985321805253, "grad_norm": 4.863894359925461, "learning_rate": 5.357849255410674e-06, "loss": 16.7782, "step": 26965 }, { "epoch": 0.49291681137697185, "grad_norm": 6.182878907863092, "learning_rate": 5.357554002340654e-06, "loss": 17.2337, "step": 26966 }, { "epoch": 0.4929350905734184, "grad_norm": 5.526272701074921, "learning_rate": 5.357258748017437e-06, "loss": 17.3214, "step": 26967 }, { "epoch": 0.4929533697698649, "grad_norm": 7.613455418716324, "learning_rate": 5.3569634924420595e-06, "loss": 18.3149, "step": 26968 }, { "epoch": 0.49297164896631146, "grad_norm": 6.67763283935045, "learning_rate": 5.356668235615557e-06, "loss": 17.8365, "step": 26969 }, { "epoch": 0.49298992816275794, "grad_norm": 5.913927222305072, "learning_rate": 5.356372977538964e-06, "loss": 17.6074, "step": 26970 }, { "epoch": 0.4930082073592045, "grad_norm": 8.135030482059443, "learning_rate": 5.356077718213313e-06, "loss": 18.0704, "step": 26971 }, { "epoch": 0.493026486555651, "grad_norm": 5.8973378186105725, "learning_rate": 5.35578245763964e-06, "loss": 17.1145, "step": 26972 }, { "epoch": 0.49304476575209755, "grad_norm": 14.303282257554455, "learning_rate": 5.35548719581898e-06, "loss": 17.8257, "step": 26973 }, { "epoch": 0.4930630449485441, "grad_norm": 5.7261448376667765, "learning_rate": 5.35519193275237e-06, "loss": 17.0845, "step": 26974 }, { "epoch": 0.49308132414499056, "grad_norm": 5.512857713231104, "learning_rate": 5.354896668440843e-06, "loss": 17.2451, "step": 26975 }, { "epoch": 0.4930996033414371, "grad_norm": 5.209464948503143, "learning_rate": 5.354601402885434e-06, "loss": 16.8178, "step": 26976 }, { "epoch": 0.49311788253788363, "grad_norm": 6.831800009675839, "learning_rate": 5.3543061360871774e-06, "loss": 17.8316, "step": 26977 }, { "epoch": 0.49313616173433017, "grad_norm": 6.310170617315658, "learning_rate": 5.3540108680471085e-06, "loss": 17.6085, "step": 26978 }, { "epoch": 0.4931544409307767, "grad_norm": 7.249291216101069, "learning_rate": 5.353715598766262e-06, "loss": 18.0408, "step": 26979 }, { "epoch": 0.4931727201272232, "grad_norm": 5.835547523451604, "learning_rate": 5.353420328245673e-06, "loss": 17.244, "step": 26980 }, { "epoch": 0.4931909993236697, "grad_norm": 6.747052172227247, "learning_rate": 5.353125056486377e-06, "loss": 17.5503, "step": 26981 }, { "epoch": 0.49320927852011626, "grad_norm": 5.787707208088663, "learning_rate": 5.352829783489407e-06, "loss": 17.3794, "step": 26982 }, { "epoch": 0.4932275577165628, "grad_norm": 7.077291862170868, "learning_rate": 5.3525345092558e-06, "loss": 17.6171, "step": 26983 }, { "epoch": 0.49324583691300933, "grad_norm": 8.164761075820138, "learning_rate": 5.352239233786592e-06, "loss": 17.6073, "step": 26984 }, { "epoch": 0.4932641161094558, "grad_norm": 7.161564110869427, "learning_rate": 5.351943957082813e-06, "loss": 17.7869, "step": 26985 }, { "epoch": 0.49328239530590234, "grad_norm": 7.7619675751750625, "learning_rate": 5.351648679145502e-06, "loss": 18.0181, "step": 26986 }, { "epoch": 0.4933006745023489, "grad_norm": 5.668140917595582, "learning_rate": 5.351353399975694e-06, "loss": 17.3054, "step": 26987 }, { "epoch": 0.4933189536987954, "grad_norm": 6.3856548032983405, "learning_rate": 5.351058119574423e-06, "loss": 17.4187, "step": 26988 }, { "epoch": 0.49333723289524195, "grad_norm": 5.526415424467501, "learning_rate": 5.3507628379427225e-06, "loss": 17.179, "step": 26989 }, { "epoch": 0.49335551209168843, "grad_norm": 6.346497051201486, "learning_rate": 5.3504675550816286e-06, "loss": 17.575, "step": 26990 }, { "epoch": 0.49337379128813497, "grad_norm": 6.980587862419724, "learning_rate": 5.350172270992177e-06, "loss": 17.946, "step": 26991 }, { "epoch": 0.4933920704845815, "grad_norm": 7.803761180025331, "learning_rate": 5.3498769856754e-06, "loss": 17.7162, "step": 26992 }, { "epoch": 0.49341034968102804, "grad_norm": 6.661552136135507, "learning_rate": 5.349581699132337e-06, "loss": 17.7463, "step": 26993 }, { "epoch": 0.4934286288774746, "grad_norm": 5.6460867041461125, "learning_rate": 5.34928641136402e-06, "loss": 17.1434, "step": 26994 }, { "epoch": 0.49344690807392105, "grad_norm": 6.079734936911856, "learning_rate": 5.348991122371484e-06, "loss": 17.5019, "step": 26995 }, { "epoch": 0.4934651872703676, "grad_norm": 5.589034062581712, "learning_rate": 5.3486958321557645e-06, "loss": 17.1942, "step": 26996 }, { "epoch": 0.4934834664668141, "grad_norm": 6.288894942471698, "learning_rate": 5.348400540717896e-06, "loss": 17.3222, "step": 26997 }, { "epoch": 0.49350174566326066, "grad_norm": 5.079433526195749, "learning_rate": 5.3481052480589154e-06, "loss": 16.911, "step": 26998 }, { "epoch": 0.49352002485970714, "grad_norm": 5.801648085214462, "learning_rate": 5.347809954179853e-06, "loss": 17.2667, "step": 26999 }, { "epoch": 0.4935383040561537, "grad_norm": 6.783307260324162, "learning_rate": 5.34751465908175e-06, "loss": 17.6292, "step": 27000 }, { "epoch": 0.4935565832526002, "grad_norm": 7.3165225948765995, "learning_rate": 5.347219362765637e-06, "loss": 17.8569, "step": 27001 }, { "epoch": 0.49357486244904675, "grad_norm": 6.571744864750892, "learning_rate": 5.34692406523255e-06, "loss": 17.5595, "step": 27002 }, { "epoch": 0.4935931416454933, "grad_norm": 6.7247155088644375, "learning_rate": 5.3466287664835255e-06, "loss": 17.9137, "step": 27003 }, { "epoch": 0.49361142084193976, "grad_norm": 5.6204778914317535, "learning_rate": 5.3463334665195954e-06, "loss": 17.0609, "step": 27004 }, { "epoch": 0.4936297000383863, "grad_norm": 6.321485899260976, "learning_rate": 5.346038165341798e-06, "loss": 17.4114, "step": 27005 }, { "epoch": 0.49364797923483283, "grad_norm": 5.684925732587549, "learning_rate": 5.3457428629511665e-06, "loss": 17.2745, "step": 27006 }, { "epoch": 0.49366625843127937, "grad_norm": 6.471030352199631, "learning_rate": 5.345447559348736e-06, "loss": 17.2311, "step": 27007 }, { "epoch": 0.4936845376277259, "grad_norm": 6.386545510890274, "learning_rate": 5.345152254535542e-06, "loss": 17.4082, "step": 27008 }, { "epoch": 0.4937028168241724, "grad_norm": 6.72417967649249, "learning_rate": 5.344856948512619e-06, "loss": 17.8282, "step": 27009 }, { "epoch": 0.4937210960206189, "grad_norm": 6.224589284089164, "learning_rate": 5.344561641281003e-06, "loss": 17.2626, "step": 27010 }, { "epoch": 0.49373937521706546, "grad_norm": 6.127257620504393, "learning_rate": 5.344266332841728e-06, "loss": 17.2126, "step": 27011 }, { "epoch": 0.493757654413512, "grad_norm": 7.429251140891445, "learning_rate": 5.343971023195829e-06, "loss": 18.1963, "step": 27012 }, { "epoch": 0.49377593360995853, "grad_norm": 6.348298199733702, "learning_rate": 5.343675712344342e-06, "loss": 17.7133, "step": 27013 }, { "epoch": 0.493794212806405, "grad_norm": 6.316016192402913, "learning_rate": 5.343380400288301e-06, "loss": 17.4648, "step": 27014 }, { "epoch": 0.49381249200285154, "grad_norm": 7.458585796933602, "learning_rate": 5.343085087028742e-06, "loss": 18.0657, "step": 27015 }, { "epoch": 0.4938307711992981, "grad_norm": 6.686006564372484, "learning_rate": 5.3427897725667e-06, "loss": 17.5928, "step": 27016 }, { "epoch": 0.4938490503957446, "grad_norm": 7.845928481052563, "learning_rate": 5.342494456903208e-06, "loss": 18.0915, "step": 27017 }, { "epoch": 0.49386732959219115, "grad_norm": 7.656002154062699, "learning_rate": 5.342199140039303e-06, "loss": 17.3364, "step": 27018 }, { "epoch": 0.49388560878863763, "grad_norm": 5.391532742953372, "learning_rate": 5.3419038219760215e-06, "loss": 17.1496, "step": 27019 }, { "epoch": 0.49390388798508417, "grad_norm": 5.359108971593141, "learning_rate": 5.341608502714395e-06, "loss": 17.1226, "step": 27020 }, { "epoch": 0.4939221671815307, "grad_norm": 7.004731176418986, "learning_rate": 5.34131318225546e-06, "loss": 17.8174, "step": 27021 }, { "epoch": 0.49394044637797724, "grad_norm": 5.484664541987583, "learning_rate": 5.341017860600253e-06, "loss": 17.1402, "step": 27022 }, { "epoch": 0.4939587255744238, "grad_norm": 6.371429794989805, "learning_rate": 5.340722537749807e-06, "loss": 17.494, "step": 27023 }, { "epoch": 0.49397700477087025, "grad_norm": 7.01453052158817, "learning_rate": 5.340427213705159e-06, "loss": 17.7144, "step": 27024 }, { "epoch": 0.4939952839673168, "grad_norm": 6.078159804931989, "learning_rate": 5.340131888467344e-06, "loss": 17.478, "step": 27025 }, { "epoch": 0.4940135631637633, "grad_norm": 6.5826178827557875, "learning_rate": 5.3398365620373935e-06, "loss": 17.4035, "step": 27026 }, { "epoch": 0.49403184236020986, "grad_norm": 6.41085703611692, "learning_rate": 5.339541234416348e-06, "loss": 17.4888, "step": 27027 }, { "epoch": 0.4940501215566564, "grad_norm": 6.471313237406635, "learning_rate": 5.3392459056052385e-06, "loss": 17.6197, "step": 27028 }, { "epoch": 0.4940684007531029, "grad_norm": 6.759620568912845, "learning_rate": 5.338950575605103e-06, "loss": 17.5151, "step": 27029 }, { "epoch": 0.4940866799495494, "grad_norm": 5.726145059844037, "learning_rate": 5.338655244416974e-06, "loss": 17.1632, "step": 27030 }, { "epoch": 0.49410495914599595, "grad_norm": 7.367143100038594, "learning_rate": 5.3383599120418865e-06, "loss": 17.8241, "step": 27031 }, { "epoch": 0.4941232383424425, "grad_norm": 8.401285293444744, "learning_rate": 5.338064578480879e-06, "loss": 18.5157, "step": 27032 }, { "epoch": 0.49414151753888896, "grad_norm": 7.261683111609722, "learning_rate": 5.3377692437349835e-06, "loss": 17.8886, "step": 27033 }, { "epoch": 0.4941597967353355, "grad_norm": 6.532766534634735, "learning_rate": 5.3374739078052366e-06, "loss": 17.487, "step": 27034 }, { "epoch": 0.49417807593178203, "grad_norm": 5.855344901640332, "learning_rate": 5.3371785706926725e-06, "loss": 17.1961, "step": 27035 }, { "epoch": 0.49419635512822857, "grad_norm": 6.000774744402495, "learning_rate": 5.336883232398326e-06, "loss": 17.4634, "step": 27036 }, { "epoch": 0.4942146343246751, "grad_norm": 6.4939483382731416, "learning_rate": 5.336587892923235e-06, "loss": 17.3718, "step": 27037 }, { "epoch": 0.4942329135211216, "grad_norm": 8.90333019290219, "learning_rate": 5.336292552268432e-06, "loss": 18.2913, "step": 27038 }, { "epoch": 0.4942511927175681, "grad_norm": 5.384420330075554, "learning_rate": 5.335997210434953e-06, "loss": 17.1865, "step": 27039 }, { "epoch": 0.49426947191401466, "grad_norm": 6.191513432394301, "learning_rate": 5.33570186742383e-06, "loss": 17.3732, "step": 27040 }, { "epoch": 0.4942877511104612, "grad_norm": 6.0282047564718555, "learning_rate": 5.335406523236104e-06, "loss": 17.5049, "step": 27041 }, { "epoch": 0.49430603030690773, "grad_norm": 7.34642269136078, "learning_rate": 5.3351111778728075e-06, "loss": 17.7227, "step": 27042 }, { "epoch": 0.4943243095033542, "grad_norm": 6.347294744552038, "learning_rate": 5.334815831334975e-06, "loss": 17.4325, "step": 27043 }, { "epoch": 0.49434258869980074, "grad_norm": 5.738275868115558, "learning_rate": 5.334520483623641e-06, "loss": 17.1608, "step": 27044 }, { "epoch": 0.4943608678962473, "grad_norm": 6.983004953804565, "learning_rate": 5.334225134739841e-06, "loss": 17.4832, "step": 27045 }, { "epoch": 0.4943791470926938, "grad_norm": 5.8536804263533, "learning_rate": 5.333929784684614e-06, "loss": 17.2471, "step": 27046 }, { "epoch": 0.49439742628914035, "grad_norm": 6.337725509820828, "learning_rate": 5.333634433458989e-06, "loss": 17.3965, "step": 27047 }, { "epoch": 0.49441570548558683, "grad_norm": 5.4616529150627215, "learning_rate": 5.333339081064006e-06, "loss": 17.2323, "step": 27048 }, { "epoch": 0.49443398468203337, "grad_norm": 6.1375526015468305, "learning_rate": 5.3330437275006985e-06, "loss": 17.3927, "step": 27049 }, { "epoch": 0.4944522638784799, "grad_norm": 6.460811066663535, "learning_rate": 5.3327483727701015e-06, "loss": 17.4909, "step": 27050 }, { "epoch": 0.49447054307492644, "grad_norm": 7.0460329858769954, "learning_rate": 5.3324530168732495e-06, "loss": 17.8015, "step": 27051 }, { "epoch": 0.494488822271373, "grad_norm": 6.81539654948131, "learning_rate": 5.332157659811179e-06, "loss": 17.6895, "step": 27052 }, { "epoch": 0.49450710146781945, "grad_norm": 5.905972951436398, "learning_rate": 5.331862301584925e-06, "loss": 17.2996, "step": 27053 }, { "epoch": 0.494525380664266, "grad_norm": 6.84792720692525, "learning_rate": 5.331566942195522e-06, "loss": 17.8233, "step": 27054 }, { "epoch": 0.4945436598607125, "grad_norm": 6.546110597837149, "learning_rate": 5.331271581644005e-06, "loss": 17.1846, "step": 27055 }, { "epoch": 0.49456193905715906, "grad_norm": 6.034470999865652, "learning_rate": 5.3309762199314115e-06, "loss": 17.1926, "step": 27056 }, { "epoch": 0.4945802182536056, "grad_norm": 6.678728078849492, "learning_rate": 5.330680857058774e-06, "loss": 17.3395, "step": 27057 }, { "epoch": 0.4945984974500521, "grad_norm": 5.890798850145908, "learning_rate": 5.330385493027128e-06, "loss": 17.2562, "step": 27058 }, { "epoch": 0.4946167766464986, "grad_norm": 5.659396926385291, "learning_rate": 5.330090127837511e-06, "loss": 17.1656, "step": 27059 }, { "epoch": 0.49463505584294515, "grad_norm": 6.4122559835855, "learning_rate": 5.329794761490957e-06, "loss": 17.6316, "step": 27060 }, { "epoch": 0.4946533350393917, "grad_norm": 7.444983281994059, "learning_rate": 5.329499393988501e-06, "loss": 17.8522, "step": 27061 }, { "epoch": 0.4946716142358382, "grad_norm": 5.868080111913281, "learning_rate": 5.3292040253311774e-06, "loss": 17.0549, "step": 27062 }, { "epoch": 0.4946898934322847, "grad_norm": 6.43927321499535, "learning_rate": 5.328908655520022e-06, "loss": 17.5224, "step": 27063 }, { "epoch": 0.49470817262873124, "grad_norm": 6.275055806201355, "learning_rate": 5.32861328455607e-06, "loss": 17.5626, "step": 27064 }, { "epoch": 0.49472645182517777, "grad_norm": 5.874244766501496, "learning_rate": 5.328317912440358e-06, "loss": 17.3449, "step": 27065 }, { "epoch": 0.4947447310216243, "grad_norm": 6.002735749726299, "learning_rate": 5.328022539173919e-06, "loss": 17.4166, "step": 27066 }, { "epoch": 0.4947630102180708, "grad_norm": 7.078313016909611, "learning_rate": 5.327727164757791e-06, "loss": 17.8288, "step": 27067 }, { "epoch": 0.4947812894145173, "grad_norm": 7.211479667554601, "learning_rate": 5.3274317891930075e-06, "loss": 17.7986, "step": 27068 }, { "epoch": 0.49479956861096386, "grad_norm": 6.49780210498228, "learning_rate": 5.327136412480603e-06, "loss": 17.5079, "step": 27069 }, { "epoch": 0.4948178478074104, "grad_norm": 4.962388526502773, "learning_rate": 5.3268410346216146e-06, "loss": 17.0274, "step": 27070 }, { "epoch": 0.49483612700385693, "grad_norm": 8.588909027113303, "learning_rate": 5.326545655617077e-06, "loss": 18.572, "step": 27071 }, { "epoch": 0.4948544062003034, "grad_norm": 7.909936946394288, "learning_rate": 5.326250275468023e-06, "loss": 17.8187, "step": 27072 }, { "epoch": 0.49487268539674995, "grad_norm": 7.79164215084259, "learning_rate": 5.325954894175491e-06, "loss": 18.0844, "step": 27073 }, { "epoch": 0.4948909645931965, "grad_norm": 6.507166316716315, "learning_rate": 5.325659511740518e-06, "loss": 17.4294, "step": 27074 }, { "epoch": 0.494909243789643, "grad_norm": 7.045042200629096, "learning_rate": 5.325364128164134e-06, "loss": 17.4291, "step": 27075 }, { "epoch": 0.49492752298608955, "grad_norm": 6.5913797952025615, "learning_rate": 5.325068743447378e-06, "loss": 17.5557, "step": 27076 }, { "epoch": 0.49494580218253603, "grad_norm": 7.157139359265984, "learning_rate": 5.324773357591284e-06, "loss": 17.8728, "step": 27077 }, { "epoch": 0.49496408137898257, "grad_norm": 6.286609264768648, "learning_rate": 5.324477970596887e-06, "loss": 17.2988, "step": 27078 }, { "epoch": 0.4949823605754291, "grad_norm": 6.139125567440642, "learning_rate": 5.324182582465224e-06, "loss": 17.442, "step": 27079 }, { "epoch": 0.49500063977187564, "grad_norm": 5.286657223187777, "learning_rate": 5.323887193197328e-06, "loss": 17.0214, "step": 27080 }, { "epoch": 0.4950189189683222, "grad_norm": 6.0313338055455175, "learning_rate": 5.323591802794237e-06, "loss": 17.1108, "step": 27081 }, { "epoch": 0.49503719816476865, "grad_norm": 6.663827894606453, "learning_rate": 5.323296411256983e-06, "loss": 17.822, "step": 27082 }, { "epoch": 0.4950554773612152, "grad_norm": 5.749880403608699, "learning_rate": 5.323001018586604e-06, "loss": 17.1773, "step": 27083 }, { "epoch": 0.4950737565576617, "grad_norm": 5.834371956386388, "learning_rate": 5.322705624784136e-06, "loss": 17.2421, "step": 27084 }, { "epoch": 0.49509203575410826, "grad_norm": 6.237005739814264, "learning_rate": 5.32241022985061e-06, "loss": 17.2866, "step": 27085 }, { "epoch": 0.4951103149505548, "grad_norm": 5.823408136601172, "learning_rate": 5.322114833787066e-06, "loss": 17.2565, "step": 27086 }, { "epoch": 0.4951285941470013, "grad_norm": 7.479932089479094, "learning_rate": 5.321819436594536e-06, "loss": 17.7947, "step": 27087 }, { "epoch": 0.4951468733434478, "grad_norm": 6.271746162433983, "learning_rate": 5.321524038274058e-06, "loss": 17.3551, "step": 27088 }, { "epoch": 0.49516515253989435, "grad_norm": 5.516386190837085, "learning_rate": 5.321228638826667e-06, "loss": 17.2787, "step": 27089 }, { "epoch": 0.4951834317363409, "grad_norm": 6.51806980194292, "learning_rate": 5.320933238253394e-06, "loss": 17.2764, "step": 27090 }, { "epoch": 0.4952017109327874, "grad_norm": 6.505095705765514, "learning_rate": 5.320637836555282e-06, "loss": 17.3872, "step": 27091 }, { "epoch": 0.4952199901292339, "grad_norm": 6.008812418764353, "learning_rate": 5.32034243373336e-06, "loss": 17.4496, "step": 27092 }, { "epoch": 0.49523826932568044, "grad_norm": 6.132994346673277, "learning_rate": 5.320047029788665e-06, "loss": 17.3415, "step": 27093 }, { "epoch": 0.49525654852212697, "grad_norm": 6.76828084705253, "learning_rate": 5.319751624722235e-06, "loss": 17.7391, "step": 27094 }, { "epoch": 0.4952748277185735, "grad_norm": 4.852920399836946, "learning_rate": 5.319456218535102e-06, "loss": 16.8498, "step": 27095 }, { "epoch": 0.49529310691502004, "grad_norm": 8.37845213462456, "learning_rate": 5.3191608112283026e-06, "loss": 18.22, "step": 27096 }, { "epoch": 0.4953113861114665, "grad_norm": 5.510644493946002, "learning_rate": 5.318865402802872e-06, "loss": 17.079, "step": 27097 }, { "epoch": 0.49532966530791306, "grad_norm": 5.823363752835767, "learning_rate": 5.318569993259848e-06, "loss": 17.3107, "step": 27098 }, { "epoch": 0.4953479445043596, "grad_norm": 5.763510154128105, "learning_rate": 5.31827458260026e-06, "loss": 17.5461, "step": 27099 }, { "epoch": 0.49536622370080613, "grad_norm": 6.918820810921189, "learning_rate": 5.317979170825149e-06, "loss": 17.4873, "step": 27100 }, { "epoch": 0.4953845028972526, "grad_norm": 7.989744572692749, "learning_rate": 5.31768375793555e-06, "loss": 18.4143, "step": 27101 }, { "epoch": 0.49540278209369915, "grad_norm": 7.214879095416795, "learning_rate": 5.317388343932497e-06, "loss": 17.8851, "step": 27102 }, { "epoch": 0.4954210612901457, "grad_norm": 5.8630533633346245, "learning_rate": 5.3170929288170235e-06, "loss": 17.2316, "step": 27103 }, { "epoch": 0.4954393404865922, "grad_norm": 5.838223225066707, "learning_rate": 5.316797512590166e-06, "loss": 17.0889, "step": 27104 }, { "epoch": 0.49545761968303875, "grad_norm": 8.490065189147296, "learning_rate": 5.316502095252964e-06, "loss": 17.939, "step": 27105 }, { "epoch": 0.49547589887948523, "grad_norm": 6.947018162341639, "learning_rate": 5.316206676806448e-06, "loss": 17.8263, "step": 27106 }, { "epoch": 0.49549417807593177, "grad_norm": 6.530436899887638, "learning_rate": 5.315911257251655e-06, "loss": 17.5459, "step": 27107 }, { "epoch": 0.4955124572723783, "grad_norm": 5.139972165119779, "learning_rate": 5.31561583658962e-06, "loss": 16.8613, "step": 27108 }, { "epoch": 0.49553073646882484, "grad_norm": 5.007554461914014, "learning_rate": 5.315320414821379e-06, "loss": 16.9549, "step": 27109 }, { "epoch": 0.4955490156652714, "grad_norm": 6.531020566255451, "learning_rate": 5.3150249919479676e-06, "loss": 17.5633, "step": 27110 }, { "epoch": 0.49556729486171786, "grad_norm": 6.7268024899273655, "learning_rate": 5.314729567970421e-06, "loss": 17.2107, "step": 27111 }, { "epoch": 0.4955855740581644, "grad_norm": 5.763513281309919, "learning_rate": 5.314434142889776e-06, "loss": 17.3185, "step": 27112 }, { "epoch": 0.4956038532546109, "grad_norm": 5.735780970376321, "learning_rate": 5.314138716707063e-06, "loss": 16.6206, "step": 27113 }, { "epoch": 0.49562213245105746, "grad_norm": 6.094915498236935, "learning_rate": 5.313843289423324e-06, "loss": 17.5662, "step": 27114 }, { "epoch": 0.495640411647504, "grad_norm": 7.193393884445647, "learning_rate": 5.313547861039592e-06, "loss": 17.6936, "step": 27115 }, { "epoch": 0.4956586908439505, "grad_norm": 6.116919446868163, "learning_rate": 5.3132524315569e-06, "loss": 17.6923, "step": 27116 }, { "epoch": 0.495676970040397, "grad_norm": 6.071783068354307, "learning_rate": 5.3129570009762864e-06, "loss": 17.4997, "step": 27117 }, { "epoch": 0.49569524923684355, "grad_norm": 7.36498956045436, "learning_rate": 5.312661569298784e-06, "loss": 17.9898, "step": 27118 }, { "epoch": 0.4957135284332901, "grad_norm": 6.513731500749691, "learning_rate": 5.312366136525433e-06, "loss": 17.376, "step": 27119 }, { "epoch": 0.4957318076297366, "grad_norm": 5.943945414772952, "learning_rate": 5.312070702657264e-06, "loss": 16.9633, "step": 27120 }, { "epoch": 0.4957500868261831, "grad_norm": 7.317799352604877, "learning_rate": 5.311775267695314e-06, "loss": 17.541, "step": 27121 }, { "epoch": 0.49576836602262964, "grad_norm": 6.678806429324938, "learning_rate": 5.311479831640619e-06, "loss": 17.6554, "step": 27122 }, { "epoch": 0.49578664521907617, "grad_norm": 6.422061144880449, "learning_rate": 5.311184394494214e-06, "loss": 17.6898, "step": 27123 }, { "epoch": 0.4958049244155227, "grad_norm": 7.257743893993381, "learning_rate": 5.310888956257135e-06, "loss": 17.7341, "step": 27124 }, { "epoch": 0.49582320361196924, "grad_norm": 5.938111724825104, "learning_rate": 5.310593516930418e-06, "loss": 17.3035, "step": 27125 }, { "epoch": 0.4958414828084157, "grad_norm": 6.100282441446354, "learning_rate": 5.310298076515096e-06, "loss": 17.5533, "step": 27126 }, { "epoch": 0.49585976200486226, "grad_norm": 6.40958138518579, "learning_rate": 5.310002635012207e-06, "loss": 17.5581, "step": 27127 }, { "epoch": 0.4958780412013088, "grad_norm": 6.904282606317481, "learning_rate": 5.309707192422786e-06, "loss": 17.818, "step": 27128 }, { "epoch": 0.49589632039775533, "grad_norm": 8.793520044325492, "learning_rate": 5.309411748747869e-06, "loss": 17.8181, "step": 27129 }, { "epoch": 0.49591459959420187, "grad_norm": 6.398017389891622, "learning_rate": 5.309116303988488e-06, "loss": 17.5039, "step": 27130 }, { "epoch": 0.49593287879064835, "grad_norm": 7.251971439581628, "learning_rate": 5.308820858145682e-06, "loss": 17.5048, "step": 27131 }, { "epoch": 0.4959511579870949, "grad_norm": 6.404215997284142, "learning_rate": 5.308525411220488e-06, "loss": 17.4336, "step": 27132 }, { "epoch": 0.4959694371835414, "grad_norm": 7.729486686990656, "learning_rate": 5.3082299632139375e-06, "loss": 17.9189, "step": 27133 }, { "epoch": 0.49598771637998795, "grad_norm": 5.949653232636323, "learning_rate": 5.307934514127068e-06, "loss": 17.5042, "step": 27134 }, { "epoch": 0.49600599557643443, "grad_norm": 5.5871038029272935, "learning_rate": 5.3076390639609146e-06, "loss": 17.4189, "step": 27135 }, { "epoch": 0.49602427477288097, "grad_norm": 6.966548112460395, "learning_rate": 5.307343612716512e-06, "loss": 17.5541, "step": 27136 }, { "epoch": 0.4960425539693275, "grad_norm": 7.007021036075989, "learning_rate": 5.307048160394899e-06, "loss": 17.6978, "step": 27137 }, { "epoch": 0.49606083316577404, "grad_norm": 8.397875547905214, "learning_rate": 5.306752706997107e-06, "loss": 18.4, "step": 27138 }, { "epoch": 0.4960791123622206, "grad_norm": 6.045377634488545, "learning_rate": 5.306457252524176e-06, "loss": 17.4589, "step": 27139 }, { "epoch": 0.49609739155866706, "grad_norm": 6.331855818214131, "learning_rate": 5.306161796977134e-06, "loss": 17.2416, "step": 27140 }, { "epoch": 0.4961156707551136, "grad_norm": 7.340035417544372, "learning_rate": 5.305866340357024e-06, "loss": 17.6641, "step": 27141 }, { "epoch": 0.4961339499515601, "grad_norm": 6.171788505845319, "learning_rate": 5.30557088266488e-06, "loss": 17.1048, "step": 27142 }, { "epoch": 0.49615222914800666, "grad_norm": 5.457816041538035, "learning_rate": 5.305275423901737e-06, "loss": 17.3584, "step": 27143 }, { "epoch": 0.4961705083444532, "grad_norm": 7.613004108036592, "learning_rate": 5.304979964068628e-06, "loss": 18.1483, "step": 27144 }, { "epoch": 0.4961887875408997, "grad_norm": 5.481668594229304, "learning_rate": 5.3046845031665915e-06, "loss": 17.0934, "step": 27145 }, { "epoch": 0.4962070667373462, "grad_norm": 7.39994442611417, "learning_rate": 5.304389041196664e-06, "loss": 17.4173, "step": 27146 }, { "epoch": 0.49622534593379275, "grad_norm": 5.7025844735581455, "learning_rate": 5.304093578159877e-06, "loss": 17.2323, "step": 27147 }, { "epoch": 0.4962436251302393, "grad_norm": 7.30143375176874, "learning_rate": 5.303798114057269e-06, "loss": 18.0775, "step": 27148 }, { "epoch": 0.4962619043266858, "grad_norm": 5.696227811299683, "learning_rate": 5.3035026488898754e-06, "loss": 17.4022, "step": 27149 }, { "epoch": 0.4962801835231323, "grad_norm": 6.530643656227143, "learning_rate": 5.30320718265873e-06, "loss": 17.6637, "step": 27150 }, { "epoch": 0.49629846271957884, "grad_norm": 6.822978267413106, "learning_rate": 5.302911715364871e-06, "loss": 17.8557, "step": 27151 }, { "epoch": 0.4963167419160254, "grad_norm": 7.268598916926087, "learning_rate": 5.3026162470093335e-06, "loss": 17.7663, "step": 27152 }, { "epoch": 0.4963350211124719, "grad_norm": 7.921854843630647, "learning_rate": 5.30232077759315e-06, "loss": 18.2357, "step": 27153 }, { "epoch": 0.49635330030891844, "grad_norm": 6.040172703825464, "learning_rate": 5.302025307117361e-06, "loss": 17.5834, "step": 27154 }, { "epoch": 0.4963715795053649, "grad_norm": 6.33443303987812, "learning_rate": 5.301729835582998e-06, "loss": 17.5087, "step": 27155 }, { "epoch": 0.49638985870181146, "grad_norm": 6.399351837295014, "learning_rate": 5.301434362991099e-06, "loss": 17.5478, "step": 27156 }, { "epoch": 0.496408137898258, "grad_norm": 7.275944016496439, "learning_rate": 5.301138889342698e-06, "loss": 18.0614, "step": 27157 }, { "epoch": 0.49642641709470453, "grad_norm": 7.654295357271992, "learning_rate": 5.300843414638831e-06, "loss": 17.9039, "step": 27158 }, { "epoch": 0.49644469629115107, "grad_norm": 6.381527232148551, "learning_rate": 5.3005479388805335e-06, "loss": 17.747, "step": 27159 }, { "epoch": 0.49646297548759755, "grad_norm": 7.172154294659397, "learning_rate": 5.300252462068845e-06, "loss": 17.5675, "step": 27160 }, { "epoch": 0.4964812546840441, "grad_norm": 5.899237087590126, "learning_rate": 5.299956984204794e-06, "loss": 17.3641, "step": 27161 }, { "epoch": 0.4964995338804906, "grad_norm": 6.293315909695069, "learning_rate": 5.299661505289421e-06, "loss": 17.1642, "step": 27162 }, { "epoch": 0.49651781307693715, "grad_norm": 7.239253128344299, "learning_rate": 5.29936602532376e-06, "loss": 17.3627, "step": 27163 }, { "epoch": 0.4965360922733837, "grad_norm": 7.336105727193535, "learning_rate": 5.299070544308847e-06, "loss": 18.1346, "step": 27164 }, { "epoch": 0.49655437146983017, "grad_norm": 6.574231614732913, "learning_rate": 5.298775062245719e-06, "loss": 17.4154, "step": 27165 }, { "epoch": 0.4965726506662767, "grad_norm": 4.892042150079644, "learning_rate": 5.298479579135409e-06, "loss": 17.0709, "step": 27166 }, { "epoch": 0.49659092986272324, "grad_norm": 6.696837834447044, "learning_rate": 5.2981840949789546e-06, "loss": 17.3769, "step": 27167 }, { "epoch": 0.4966092090591698, "grad_norm": 7.3244109929235295, "learning_rate": 5.297888609777391e-06, "loss": 17.7439, "step": 27168 }, { "epoch": 0.49662748825561626, "grad_norm": 5.75513126188906, "learning_rate": 5.2975931235317525e-06, "loss": 17.3198, "step": 27169 }, { "epoch": 0.4966457674520628, "grad_norm": 6.675387476127053, "learning_rate": 5.297297636243077e-06, "loss": 17.27, "step": 27170 }, { "epoch": 0.4966640466485093, "grad_norm": 6.81580139447009, "learning_rate": 5.2970021479124e-06, "loss": 17.6338, "step": 27171 }, { "epoch": 0.49668232584495586, "grad_norm": 6.732773707828087, "learning_rate": 5.296706658540753e-06, "loss": 17.3781, "step": 27172 }, { "epoch": 0.4967006050414024, "grad_norm": 5.441919668441327, "learning_rate": 5.296411168129177e-06, "loss": 17.0593, "step": 27173 }, { "epoch": 0.4967188842378489, "grad_norm": 6.315453747522237, "learning_rate": 5.296115676678707e-06, "loss": 17.4357, "step": 27174 }, { "epoch": 0.4967371634342954, "grad_norm": 7.056538431335537, "learning_rate": 5.2958201841903754e-06, "loss": 17.6652, "step": 27175 }, { "epoch": 0.49675544263074195, "grad_norm": 5.821744196774932, "learning_rate": 5.295524690665221e-06, "loss": 17.2166, "step": 27176 }, { "epoch": 0.4967737218271885, "grad_norm": 7.502599944919671, "learning_rate": 5.295229196104277e-06, "loss": 18.0384, "step": 27177 }, { "epoch": 0.496792001023635, "grad_norm": 5.019815368437581, "learning_rate": 5.2949337005085795e-06, "loss": 17.0715, "step": 27178 }, { "epoch": 0.4968102802200815, "grad_norm": 6.0498780514001815, "learning_rate": 5.294638203879167e-06, "loss": 17.3159, "step": 27179 }, { "epoch": 0.49682855941652804, "grad_norm": 6.728427255070438, "learning_rate": 5.294342706217072e-06, "loss": 18.0954, "step": 27180 }, { "epoch": 0.4968468386129746, "grad_norm": 6.383029542631247, "learning_rate": 5.294047207523332e-06, "loss": 17.4091, "step": 27181 }, { "epoch": 0.4968651178094211, "grad_norm": 5.8332534112216745, "learning_rate": 5.293751707798981e-06, "loss": 17.1511, "step": 27182 }, { "epoch": 0.49688339700586764, "grad_norm": 5.88775996750707, "learning_rate": 5.293456207045056e-06, "loss": 17.2777, "step": 27183 }, { "epoch": 0.4969016762023141, "grad_norm": 7.75262316026169, "learning_rate": 5.293160705262594e-06, "loss": 17.8964, "step": 27184 }, { "epoch": 0.49691995539876066, "grad_norm": 6.520555094380181, "learning_rate": 5.292865202452628e-06, "loss": 17.6149, "step": 27185 }, { "epoch": 0.4969382345952072, "grad_norm": 6.358301365429968, "learning_rate": 5.2925696986161935e-06, "loss": 17.1833, "step": 27186 }, { "epoch": 0.49695651379165373, "grad_norm": 5.566572725779698, "learning_rate": 5.2922741937543294e-06, "loss": 17.2679, "step": 27187 }, { "epoch": 0.49697479298810027, "grad_norm": 6.276243680080803, "learning_rate": 5.2919786878680705e-06, "loss": 17.5834, "step": 27188 }, { "epoch": 0.49699307218454675, "grad_norm": 5.914734083121124, "learning_rate": 5.29168318095845e-06, "loss": 17.2533, "step": 27189 }, { "epoch": 0.4970113513809933, "grad_norm": 7.3116436347415945, "learning_rate": 5.291387673026505e-06, "loss": 17.3719, "step": 27190 }, { "epoch": 0.4970296305774398, "grad_norm": 6.664224589792451, "learning_rate": 5.291092164073273e-06, "loss": 17.7188, "step": 27191 }, { "epoch": 0.49704790977388635, "grad_norm": 7.468512860219787, "learning_rate": 5.290796654099787e-06, "loss": 17.5344, "step": 27192 }, { "epoch": 0.4970661889703329, "grad_norm": 5.7942198892057695, "learning_rate": 5.2905011431070845e-06, "loss": 17.0086, "step": 27193 }, { "epoch": 0.49708446816677937, "grad_norm": 7.135569698803995, "learning_rate": 5.2902056310962005e-06, "loss": 17.7024, "step": 27194 }, { "epoch": 0.4971027473632259, "grad_norm": 8.383877195301391, "learning_rate": 5.28991011806817e-06, "loss": 17.6119, "step": 27195 }, { "epoch": 0.49712102655967244, "grad_norm": 6.165860670145469, "learning_rate": 5.2896146040240305e-06, "loss": 17.3282, "step": 27196 }, { "epoch": 0.497139305756119, "grad_norm": 7.3823915368426185, "learning_rate": 5.289319088964817e-06, "loss": 17.7051, "step": 27197 }, { "epoch": 0.4971575849525655, "grad_norm": 5.654115886268558, "learning_rate": 5.289023572891567e-06, "loss": 17.1707, "step": 27198 }, { "epoch": 0.497175864149012, "grad_norm": 7.867968063707022, "learning_rate": 5.288728055805311e-06, "loss": 18.0214, "step": 27199 }, { "epoch": 0.49719414334545853, "grad_norm": 7.252312509773188, "learning_rate": 5.28843253770709e-06, "loss": 17.5912, "step": 27200 }, { "epoch": 0.49721242254190506, "grad_norm": 6.8954169769120055, "learning_rate": 5.288137018597939e-06, "loss": 17.8047, "step": 27201 }, { "epoch": 0.4972307017383516, "grad_norm": 7.892652262769014, "learning_rate": 5.287841498478892e-06, "loss": 17.9997, "step": 27202 }, { "epoch": 0.4972489809347981, "grad_norm": 5.391683692320803, "learning_rate": 5.287545977350985e-06, "loss": 17.2399, "step": 27203 }, { "epoch": 0.4972672601312446, "grad_norm": 7.162900747616963, "learning_rate": 5.287250455215254e-06, "loss": 17.7604, "step": 27204 }, { "epoch": 0.49728553932769115, "grad_norm": 6.745080042105251, "learning_rate": 5.2869549320727355e-06, "loss": 17.527, "step": 27205 }, { "epoch": 0.4973038185241377, "grad_norm": 7.4739556135528, "learning_rate": 5.286659407924465e-06, "loss": 17.9076, "step": 27206 }, { "epoch": 0.4973220977205842, "grad_norm": 6.454025515691937, "learning_rate": 5.286363882771478e-06, "loss": 17.8487, "step": 27207 }, { "epoch": 0.4973403769170307, "grad_norm": 5.179353665563951, "learning_rate": 5.2860683566148105e-06, "loss": 17.1066, "step": 27208 }, { "epoch": 0.49735865611347724, "grad_norm": 5.0208671925141815, "learning_rate": 5.285772829455499e-06, "loss": 17.0062, "step": 27209 }, { "epoch": 0.4973769353099238, "grad_norm": 6.331156785180926, "learning_rate": 5.285477301294577e-06, "loss": 17.5236, "step": 27210 }, { "epoch": 0.4973952145063703, "grad_norm": 6.954132012161971, "learning_rate": 5.2851817721330835e-06, "loss": 17.7691, "step": 27211 }, { "epoch": 0.49741349370281684, "grad_norm": 7.436982095131251, "learning_rate": 5.284886241972051e-06, "loss": 17.9956, "step": 27212 }, { "epoch": 0.4974317728992633, "grad_norm": 6.021238119244414, "learning_rate": 5.284590710812519e-06, "loss": 17.1315, "step": 27213 }, { "epoch": 0.49745005209570986, "grad_norm": 5.331703064694421, "learning_rate": 5.284295178655518e-06, "loss": 16.984, "step": 27214 }, { "epoch": 0.4974683312921564, "grad_norm": 6.639442860072865, "learning_rate": 5.283999645502091e-06, "loss": 17.5802, "step": 27215 }, { "epoch": 0.49748661048860293, "grad_norm": 6.296819913586566, "learning_rate": 5.283704111353267e-06, "loss": 17.7537, "step": 27216 }, { "epoch": 0.49750488968504947, "grad_norm": 5.949604943756755, "learning_rate": 5.283408576210085e-06, "loss": 17.375, "step": 27217 }, { "epoch": 0.49752316888149595, "grad_norm": 5.874507368310843, "learning_rate": 5.283113040073581e-06, "loss": 17.2806, "step": 27218 }, { "epoch": 0.4975414480779425, "grad_norm": 5.321096736482824, "learning_rate": 5.282817502944791e-06, "loss": 16.9946, "step": 27219 }, { "epoch": 0.497559727274389, "grad_norm": 4.867627458878209, "learning_rate": 5.28252196482475e-06, "loss": 16.8596, "step": 27220 }, { "epoch": 0.49757800647083555, "grad_norm": 5.657588405964617, "learning_rate": 5.282226425714494e-06, "loss": 17.1736, "step": 27221 }, { "epoch": 0.4975962856672821, "grad_norm": 7.688874444755954, "learning_rate": 5.281930885615059e-06, "loss": 17.4771, "step": 27222 }, { "epoch": 0.49761456486372857, "grad_norm": 7.27819314632887, "learning_rate": 5.28163534452748e-06, "loss": 18.0137, "step": 27223 }, { "epoch": 0.4976328440601751, "grad_norm": 7.227627704917986, "learning_rate": 5.281339802452794e-06, "loss": 17.845, "step": 27224 }, { "epoch": 0.49765112325662164, "grad_norm": 6.586557711148001, "learning_rate": 5.281044259392038e-06, "loss": 17.5847, "step": 27225 }, { "epoch": 0.4976694024530682, "grad_norm": 6.729547602989797, "learning_rate": 5.280748715346242e-06, "loss": 17.7012, "step": 27226 }, { "epoch": 0.4976876816495147, "grad_norm": 6.039794973030868, "learning_rate": 5.28045317031645e-06, "loss": 17.4433, "step": 27227 }, { "epoch": 0.4977059608459612, "grad_norm": 5.757563907346949, "learning_rate": 5.280157624303692e-06, "loss": 17.0479, "step": 27228 }, { "epoch": 0.49772424004240773, "grad_norm": 7.145382119795324, "learning_rate": 5.279862077309007e-06, "loss": 17.6854, "step": 27229 }, { "epoch": 0.49774251923885426, "grad_norm": 6.90161787296418, "learning_rate": 5.27956652933343e-06, "loss": 17.8546, "step": 27230 }, { "epoch": 0.4977607984353008, "grad_norm": 5.636887504816268, "learning_rate": 5.279270980377994e-06, "loss": 17.2021, "step": 27231 }, { "epoch": 0.49777907763174734, "grad_norm": 7.027245822017659, "learning_rate": 5.27897543044374e-06, "loss": 17.7732, "step": 27232 }, { "epoch": 0.4977973568281938, "grad_norm": 6.361708975918521, "learning_rate": 5.278679879531701e-06, "loss": 17.2399, "step": 27233 }, { "epoch": 0.49781563602464035, "grad_norm": 6.389524564784658, "learning_rate": 5.278384327642912e-06, "loss": 17.7114, "step": 27234 }, { "epoch": 0.4978339152210869, "grad_norm": 6.159050623032668, "learning_rate": 5.278088774778412e-06, "loss": 17.4076, "step": 27235 }, { "epoch": 0.4978521944175334, "grad_norm": 7.697747232418964, "learning_rate": 5.277793220939233e-06, "loss": 18.5743, "step": 27236 }, { "epoch": 0.4978704736139799, "grad_norm": 6.099365168059452, "learning_rate": 5.277497666126413e-06, "loss": 17.5432, "step": 27237 }, { "epoch": 0.49788875281042644, "grad_norm": 7.462158905698103, "learning_rate": 5.277202110340989e-06, "loss": 18.1144, "step": 27238 }, { "epoch": 0.497907032006873, "grad_norm": 5.544124810711395, "learning_rate": 5.276906553583996e-06, "loss": 17.2428, "step": 27239 }, { "epoch": 0.4979253112033195, "grad_norm": 7.329429172628634, "learning_rate": 5.276610995856468e-06, "loss": 17.5655, "step": 27240 }, { "epoch": 0.49794359039976605, "grad_norm": 5.533087384747263, "learning_rate": 5.276315437159443e-06, "loss": 17.2499, "step": 27241 }, { "epoch": 0.4979618695962125, "grad_norm": 6.646939876218621, "learning_rate": 5.2760198774939565e-06, "loss": 17.5805, "step": 27242 }, { "epoch": 0.49798014879265906, "grad_norm": 7.168023518725268, "learning_rate": 5.275724316861045e-06, "loss": 18.0732, "step": 27243 }, { "epoch": 0.4979984279891056, "grad_norm": 7.253311223750868, "learning_rate": 5.275428755261742e-06, "loss": 17.8587, "step": 27244 }, { "epoch": 0.49801670718555213, "grad_norm": 6.635024711834854, "learning_rate": 5.275133192697086e-06, "loss": 17.6585, "step": 27245 }, { "epoch": 0.49803498638199867, "grad_norm": 5.967339873264225, "learning_rate": 5.274837629168112e-06, "loss": 17.2748, "step": 27246 }, { "epoch": 0.49805326557844515, "grad_norm": 5.594154402885207, "learning_rate": 5.274542064675857e-06, "loss": 17.3248, "step": 27247 }, { "epoch": 0.4980715447748917, "grad_norm": 6.088278735186422, "learning_rate": 5.274246499221355e-06, "loss": 17.6099, "step": 27248 }, { "epoch": 0.4980898239713382, "grad_norm": 5.448986816313254, "learning_rate": 5.273950932805641e-06, "loss": 17.2054, "step": 27249 }, { "epoch": 0.49810810316778475, "grad_norm": 6.941485267383951, "learning_rate": 5.273655365429756e-06, "loss": 17.5758, "step": 27250 }, { "epoch": 0.4981263823642313, "grad_norm": 7.093893261235679, "learning_rate": 5.273359797094731e-06, "loss": 17.6851, "step": 27251 }, { "epoch": 0.49814466156067777, "grad_norm": 6.0737254445571365, "learning_rate": 5.273064227801604e-06, "loss": 17.2736, "step": 27252 }, { "epoch": 0.4981629407571243, "grad_norm": 6.223150827510893, "learning_rate": 5.272768657551411e-06, "loss": 17.375, "step": 27253 }, { "epoch": 0.49818121995357084, "grad_norm": 6.778711264125992, "learning_rate": 5.272473086345187e-06, "loss": 17.4212, "step": 27254 }, { "epoch": 0.4981994991500174, "grad_norm": 5.4685402624857025, "learning_rate": 5.272177514183967e-06, "loss": 17.1399, "step": 27255 }, { "epoch": 0.4982177783464639, "grad_norm": 5.971388206290414, "learning_rate": 5.271881941068792e-06, "loss": 17.3266, "step": 27256 }, { "epoch": 0.4982360575429104, "grad_norm": 5.697162623851452, "learning_rate": 5.271586367000692e-06, "loss": 17.3548, "step": 27257 }, { "epoch": 0.49825433673935693, "grad_norm": 6.088845435619034, "learning_rate": 5.271290791980704e-06, "loss": 17.3779, "step": 27258 }, { "epoch": 0.49827261593580346, "grad_norm": 5.853067754879511, "learning_rate": 5.270995216009867e-06, "loss": 17.323, "step": 27259 }, { "epoch": 0.49829089513225, "grad_norm": 6.57190388598174, "learning_rate": 5.2706996390892166e-06, "loss": 17.6874, "step": 27260 }, { "epoch": 0.49830917432869654, "grad_norm": 5.603114077511451, "learning_rate": 5.270404061219786e-06, "loss": 17.0561, "step": 27261 }, { "epoch": 0.498327453525143, "grad_norm": 6.171017351257926, "learning_rate": 5.270108482402612e-06, "loss": 17.3007, "step": 27262 }, { "epoch": 0.49834573272158955, "grad_norm": 6.350938266900552, "learning_rate": 5.269812902638733e-06, "loss": 17.8039, "step": 27263 }, { "epoch": 0.4983640119180361, "grad_norm": 5.562161501121355, "learning_rate": 5.2695173219291805e-06, "loss": 17.1186, "step": 27264 }, { "epoch": 0.4983822911144826, "grad_norm": 5.4989385623513565, "learning_rate": 5.269221740274996e-06, "loss": 17.3501, "step": 27265 }, { "epoch": 0.49840057031092916, "grad_norm": 6.814186356177413, "learning_rate": 5.268926157677211e-06, "loss": 17.681, "step": 27266 }, { "epoch": 0.49841884950737564, "grad_norm": 6.32709050709167, "learning_rate": 5.268630574136864e-06, "loss": 17.4067, "step": 27267 }, { "epoch": 0.4984371287038222, "grad_norm": 6.0456350719203, "learning_rate": 5.268334989654988e-06, "loss": 17.6149, "step": 27268 }, { "epoch": 0.4984554079002687, "grad_norm": 5.299140660827133, "learning_rate": 5.268039404232624e-06, "loss": 17.1577, "step": 27269 }, { "epoch": 0.49847368709671525, "grad_norm": 5.4418613551047965, "learning_rate": 5.267743817870805e-06, "loss": 17.1168, "step": 27270 }, { "epoch": 0.4984919662931617, "grad_norm": 7.413473483414581, "learning_rate": 5.267448230570565e-06, "loss": 17.9511, "step": 27271 }, { "epoch": 0.49851024548960826, "grad_norm": 6.447941037758599, "learning_rate": 5.267152642332943e-06, "loss": 17.6765, "step": 27272 }, { "epoch": 0.4985285246860548, "grad_norm": 8.662117696687469, "learning_rate": 5.266857053158975e-06, "loss": 18.0849, "step": 27273 }, { "epoch": 0.49854680388250133, "grad_norm": 4.784470459934603, "learning_rate": 5.2665614630496965e-06, "loss": 16.8672, "step": 27274 }, { "epoch": 0.49856508307894787, "grad_norm": 7.212260733700813, "learning_rate": 5.2662658720061424e-06, "loss": 17.6389, "step": 27275 }, { "epoch": 0.49858336227539435, "grad_norm": 6.082691345885144, "learning_rate": 5.265970280029349e-06, "loss": 17.5206, "step": 27276 }, { "epoch": 0.4986016414718409, "grad_norm": 6.361255605534515, "learning_rate": 5.265674687120354e-06, "loss": 17.3961, "step": 27277 }, { "epoch": 0.4986199206682874, "grad_norm": 7.882584150565434, "learning_rate": 5.265379093280191e-06, "loss": 18.135, "step": 27278 }, { "epoch": 0.49863819986473396, "grad_norm": 6.803867565955744, "learning_rate": 5.265083498509898e-06, "loss": 17.8072, "step": 27279 }, { "epoch": 0.4986564790611805, "grad_norm": 6.734315518552827, "learning_rate": 5.26478790281051e-06, "loss": 17.3493, "step": 27280 }, { "epoch": 0.49867475825762697, "grad_norm": 5.94951606986175, "learning_rate": 5.264492306183063e-06, "loss": 17.4249, "step": 27281 }, { "epoch": 0.4986930374540735, "grad_norm": 7.496664659887621, "learning_rate": 5.264196708628595e-06, "loss": 17.6521, "step": 27282 }, { "epoch": 0.49871131665052004, "grad_norm": 5.084969949782511, "learning_rate": 5.26390111014814e-06, "loss": 16.9039, "step": 27283 }, { "epoch": 0.4987295958469666, "grad_norm": 7.350533788688811, "learning_rate": 5.263605510742734e-06, "loss": 17.7732, "step": 27284 }, { "epoch": 0.4987478750434131, "grad_norm": 5.92753706762238, "learning_rate": 5.263309910413412e-06, "loss": 17.1627, "step": 27285 }, { "epoch": 0.4987661542398596, "grad_norm": 8.021800227382037, "learning_rate": 5.263014309161214e-06, "loss": 18.1532, "step": 27286 }, { "epoch": 0.49878443343630613, "grad_norm": 6.557198343835686, "learning_rate": 5.262718706987172e-06, "loss": 17.678, "step": 27287 }, { "epoch": 0.49880271263275267, "grad_norm": 6.704476399119188, "learning_rate": 5.262423103892327e-06, "loss": 17.7313, "step": 27288 }, { "epoch": 0.4988209918291992, "grad_norm": 8.38404822779253, "learning_rate": 5.262127499877708e-06, "loss": 18.4438, "step": 27289 }, { "epoch": 0.49883927102564574, "grad_norm": 6.1932640372868715, "learning_rate": 5.261831894944356e-06, "loss": 17.103, "step": 27290 }, { "epoch": 0.4988575502220922, "grad_norm": 6.626956857118013, "learning_rate": 5.261536289093308e-06, "loss": 17.4115, "step": 27291 }, { "epoch": 0.49887582941853875, "grad_norm": 5.9304365371352965, "learning_rate": 5.261240682325595e-06, "loss": 17.4598, "step": 27292 }, { "epoch": 0.4988941086149853, "grad_norm": 7.531357573936381, "learning_rate": 5.260945074642257e-06, "loss": 18.1339, "step": 27293 }, { "epoch": 0.4989123878114318, "grad_norm": 6.873920083080513, "learning_rate": 5.26064946604433e-06, "loss": 17.749, "step": 27294 }, { "epoch": 0.49893066700787836, "grad_norm": 5.743669766851879, "learning_rate": 5.260353856532848e-06, "loss": 17.2678, "step": 27295 }, { "epoch": 0.49894894620432484, "grad_norm": 6.520042487393192, "learning_rate": 5.260058246108849e-06, "loss": 17.7258, "step": 27296 }, { "epoch": 0.4989672254007714, "grad_norm": 6.582487933064542, "learning_rate": 5.259762634773369e-06, "loss": 17.6029, "step": 27297 }, { "epoch": 0.4989855045972179, "grad_norm": 6.714649967128315, "learning_rate": 5.259467022527443e-06, "loss": 17.6026, "step": 27298 }, { "epoch": 0.49900378379366445, "grad_norm": 6.081654655488484, "learning_rate": 5.259171409372107e-06, "loss": 17.3182, "step": 27299 }, { "epoch": 0.499022062990111, "grad_norm": 5.695502752793096, "learning_rate": 5.258875795308398e-06, "loss": 17.404, "step": 27300 }, { "epoch": 0.49904034218655746, "grad_norm": 5.562535014051476, "learning_rate": 5.258580180337353e-06, "loss": 17.538, "step": 27301 }, { "epoch": 0.499058621383004, "grad_norm": 5.945981359883965, "learning_rate": 5.258284564460006e-06, "loss": 17.2567, "step": 27302 }, { "epoch": 0.49907690057945053, "grad_norm": 8.16154827655044, "learning_rate": 5.2579889476773936e-06, "loss": 18.235, "step": 27303 }, { "epoch": 0.49909517977589707, "grad_norm": 7.341600633851559, "learning_rate": 5.257693329990552e-06, "loss": 18.1669, "step": 27304 }, { "epoch": 0.49911345897234355, "grad_norm": 5.323704261626477, "learning_rate": 5.257397711400519e-06, "loss": 17.2324, "step": 27305 }, { "epoch": 0.4991317381687901, "grad_norm": 5.860128598907489, "learning_rate": 5.2571020919083294e-06, "loss": 17.3605, "step": 27306 }, { "epoch": 0.4991500173652366, "grad_norm": 4.738935459635471, "learning_rate": 5.256806471515018e-06, "loss": 16.9251, "step": 27307 }, { "epoch": 0.49916829656168316, "grad_norm": 6.472101664606162, "learning_rate": 5.2565108502216225e-06, "loss": 17.2316, "step": 27308 }, { "epoch": 0.4991865757581297, "grad_norm": 7.973558479732446, "learning_rate": 5.256215228029179e-06, "loss": 18.1083, "step": 27309 }, { "epoch": 0.49920485495457617, "grad_norm": 4.894069680357392, "learning_rate": 5.255919604938723e-06, "loss": 16.889, "step": 27310 }, { "epoch": 0.4992231341510227, "grad_norm": 5.8727078370170185, "learning_rate": 5.255623980951292e-06, "loss": 17.3891, "step": 27311 }, { "epoch": 0.49924141334746924, "grad_norm": 6.632408257021762, "learning_rate": 5.2553283560679205e-06, "loss": 17.6655, "step": 27312 }, { "epoch": 0.4992596925439158, "grad_norm": 6.405787045312934, "learning_rate": 5.255032730289644e-06, "loss": 17.5536, "step": 27313 }, { "epoch": 0.4992779717403623, "grad_norm": 7.02220431101913, "learning_rate": 5.254737103617502e-06, "loss": 17.9237, "step": 27314 }, { "epoch": 0.4992962509368088, "grad_norm": 6.160457284452359, "learning_rate": 5.254441476052529e-06, "loss": 17.2988, "step": 27315 }, { "epoch": 0.49931453013325533, "grad_norm": 6.7947684550883745, "learning_rate": 5.254145847595758e-06, "loss": 17.7638, "step": 27316 }, { "epoch": 0.49933280932970187, "grad_norm": 6.777068010215756, "learning_rate": 5.253850218248228e-06, "loss": 17.8532, "step": 27317 }, { "epoch": 0.4993510885261484, "grad_norm": 7.243763457221405, "learning_rate": 5.2535545880109775e-06, "loss": 17.7889, "step": 27318 }, { "epoch": 0.49936936772259494, "grad_norm": 5.3918795999495, "learning_rate": 5.25325895688504e-06, "loss": 17.0649, "step": 27319 }, { "epoch": 0.4993876469190414, "grad_norm": 5.413404390198723, "learning_rate": 5.25296332487145e-06, "loss": 17.1639, "step": 27320 }, { "epoch": 0.49940592611548795, "grad_norm": 5.72906473494947, "learning_rate": 5.252667691971247e-06, "loss": 17.2953, "step": 27321 }, { "epoch": 0.4994242053119345, "grad_norm": 5.193161058333442, "learning_rate": 5.252372058185465e-06, "loss": 17.0099, "step": 27322 }, { "epoch": 0.499442484508381, "grad_norm": 7.039314401305383, "learning_rate": 5.25207642351514e-06, "loss": 17.5882, "step": 27323 }, { "epoch": 0.49946076370482756, "grad_norm": 8.025612850124404, "learning_rate": 5.2517807879613105e-06, "loss": 17.9741, "step": 27324 }, { "epoch": 0.49947904290127404, "grad_norm": 5.330340399919986, "learning_rate": 5.251485151525011e-06, "loss": 16.956, "step": 27325 }, { "epoch": 0.4994973220977206, "grad_norm": 7.545316536692668, "learning_rate": 5.251189514207276e-06, "loss": 18.2837, "step": 27326 }, { "epoch": 0.4995156012941671, "grad_norm": 6.0748830026656515, "learning_rate": 5.250893876009146e-06, "loss": 17.2829, "step": 27327 }, { "epoch": 0.49953388049061365, "grad_norm": 5.65142999078361, "learning_rate": 5.2505982369316525e-06, "loss": 17.2648, "step": 27328 }, { "epoch": 0.4995521596870602, "grad_norm": 6.562824523907876, "learning_rate": 5.250302596975836e-06, "loss": 17.387, "step": 27329 }, { "epoch": 0.49957043888350666, "grad_norm": 6.622548580334553, "learning_rate": 5.25000695614273e-06, "loss": 17.3598, "step": 27330 }, { "epoch": 0.4995887180799532, "grad_norm": 7.550808300661673, "learning_rate": 5.24971131443337e-06, "loss": 18.1236, "step": 27331 }, { "epoch": 0.49960699727639973, "grad_norm": 5.670013093144835, "learning_rate": 5.2494156718487955e-06, "loss": 17.1048, "step": 27332 }, { "epoch": 0.49962527647284627, "grad_norm": 4.981376117498401, "learning_rate": 5.249120028390039e-06, "loss": 17.1096, "step": 27333 }, { "epoch": 0.4996435556692928, "grad_norm": 7.2170066049910115, "learning_rate": 5.2488243840581395e-06, "loss": 17.6629, "step": 27334 }, { "epoch": 0.4996618348657393, "grad_norm": 6.390773456586503, "learning_rate": 5.248528738854132e-06, "loss": 17.352, "step": 27335 }, { "epoch": 0.4996801140621858, "grad_norm": 5.998306482182172, "learning_rate": 5.248233092779053e-06, "loss": 17.4651, "step": 27336 }, { "epoch": 0.49969839325863236, "grad_norm": 5.664445245901735, "learning_rate": 5.247937445833937e-06, "loss": 17.337, "step": 27337 }, { "epoch": 0.4997166724550789, "grad_norm": 7.0876287427138704, "learning_rate": 5.247641798019824e-06, "loss": 17.7832, "step": 27338 }, { "epoch": 0.49973495165152537, "grad_norm": 7.7887961983554534, "learning_rate": 5.247346149337746e-06, "loss": 17.8716, "step": 27339 }, { "epoch": 0.4997532308479719, "grad_norm": 6.913824805197927, "learning_rate": 5.247050499788742e-06, "loss": 17.7465, "step": 27340 }, { "epoch": 0.49977151004441844, "grad_norm": 6.70493938546642, "learning_rate": 5.246754849373848e-06, "loss": 17.6515, "step": 27341 }, { "epoch": 0.499789789240865, "grad_norm": 7.240895239359535, "learning_rate": 5.246459198094098e-06, "loss": 17.7322, "step": 27342 }, { "epoch": 0.4998080684373115, "grad_norm": 7.6092188518292, "learning_rate": 5.246163545950532e-06, "loss": 17.7282, "step": 27343 }, { "epoch": 0.499826347633758, "grad_norm": 8.32095780022321, "learning_rate": 5.245867892944183e-06, "loss": 18.1291, "step": 27344 }, { "epoch": 0.49984462683020453, "grad_norm": 5.387775197233404, "learning_rate": 5.245572239076089e-06, "loss": 17.1777, "step": 27345 }, { "epoch": 0.49986290602665107, "grad_norm": 5.71156203244884, "learning_rate": 5.245276584347285e-06, "loss": 17.2225, "step": 27346 }, { "epoch": 0.4998811852230976, "grad_norm": 5.826846297576714, "learning_rate": 5.2449809287588086e-06, "loss": 16.9412, "step": 27347 }, { "epoch": 0.49989946441954414, "grad_norm": 5.894183395356666, "learning_rate": 5.2446852723116945e-06, "loss": 17.2698, "step": 27348 }, { "epoch": 0.4999177436159906, "grad_norm": 7.939433203270985, "learning_rate": 5.24438961500698e-06, "loss": 17.8702, "step": 27349 }, { "epoch": 0.49993602281243715, "grad_norm": 7.17360554657337, "learning_rate": 5.244093956845701e-06, "loss": 17.8907, "step": 27350 }, { "epoch": 0.4999543020088837, "grad_norm": 7.083243086783728, "learning_rate": 5.2437982978288935e-06, "loss": 17.8503, "step": 27351 }, { "epoch": 0.4999725812053302, "grad_norm": 6.267525803147073, "learning_rate": 5.2435026379575945e-06, "loss": 17.5359, "step": 27352 }, { "epoch": 0.49999086040177676, "grad_norm": 5.68135934114697, "learning_rate": 5.243206977232841e-06, "loss": 17.2727, "step": 27353 }, { "epoch": 0.5000091395982232, "grad_norm": 6.0711168002188565, "learning_rate": 5.242911315655667e-06, "loss": 17.3524, "step": 27354 }, { "epoch": 0.5000274187946698, "grad_norm": 8.019120929226196, "learning_rate": 5.24261565322711e-06, "loss": 18.4906, "step": 27355 }, { "epoch": 0.5000456979911163, "grad_norm": 6.1078030558693905, "learning_rate": 5.242319989948207e-06, "loss": 17.3165, "step": 27356 }, { "epoch": 0.5000639771875628, "grad_norm": 6.110272340357689, "learning_rate": 5.242024325819993e-06, "loss": 17.1111, "step": 27357 }, { "epoch": 0.5000822563840094, "grad_norm": 6.387111648612196, "learning_rate": 5.241728660843504e-06, "loss": 17.4546, "step": 27358 }, { "epoch": 0.5001005355804559, "grad_norm": 6.958135010754308, "learning_rate": 5.241432995019776e-06, "loss": 17.8441, "step": 27359 }, { "epoch": 0.5001188147769025, "grad_norm": 5.9604794133716466, "learning_rate": 5.241137328349849e-06, "loss": 17.4998, "step": 27360 }, { "epoch": 0.5001370939733489, "grad_norm": 7.191712253563262, "learning_rate": 5.240841660834756e-06, "loss": 17.6892, "step": 27361 }, { "epoch": 0.5001553731697954, "grad_norm": 8.089367139206132, "learning_rate": 5.240545992475533e-06, "loss": 17.9973, "step": 27362 }, { "epoch": 0.500173652366242, "grad_norm": 6.500798907545879, "learning_rate": 5.240250323273217e-06, "loss": 17.9538, "step": 27363 }, { "epoch": 0.5001919315626885, "grad_norm": 7.202789932386763, "learning_rate": 5.239954653228845e-06, "loss": 17.4273, "step": 27364 }, { "epoch": 0.5002102107591351, "grad_norm": 6.917364605155819, "learning_rate": 5.239658982343453e-06, "loss": 17.7761, "step": 27365 }, { "epoch": 0.5002284899555816, "grad_norm": 6.394389360957413, "learning_rate": 5.239363310618076e-06, "loss": 17.4169, "step": 27366 }, { "epoch": 0.500246769152028, "grad_norm": 5.216270305462959, "learning_rate": 5.239067638053752e-06, "loss": 16.9941, "step": 27367 }, { "epoch": 0.5002650483484746, "grad_norm": 5.8359567468384945, "learning_rate": 5.238771964651517e-06, "loss": 17.0607, "step": 27368 }, { "epoch": 0.5002833275449211, "grad_norm": 7.552960228153321, "learning_rate": 5.238476290412407e-06, "loss": 18.4582, "step": 27369 }, { "epoch": 0.5003016067413677, "grad_norm": 6.996073524475762, "learning_rate": 5.238180615337459e-06, "loss": 17.414, "step": 27370 }, { "epoch": 0.5003198859378142, "grad_norm": 5.943166499036538, "learning_rate": 5.237884939427707e-06, "loss": 17.4045, "step": 27371 }, { "epoch": 0.5003381651342607, "grad_norm": 6.822133846742642, "learning_rate": 5.237589262684188e-06, "loss": 17.3984, "step": 27372 }, { "epoch": 0.5003564443307073, "grad_norm": 4.917184385497025, "learning_rate": 5.237293585107942e-06, "loss": 16.8352, "step": 27373 }, { "epoch": 0.5003747235271537, "grad_norm": 5.67334271032968, "learning_rate": 5.236997906700002e-06, "loss": 17.2446, "step": 27374 }, { "epoch": 0.5003930027236003, "grad_norm": 5.8042370374908785, "learning_rate": 5.236702227461404e-06, "loss": 17.2292, "step": 27375 }, { "epoch": 0.5004112819200468, "grad_norm": 7.816240683140112, "learning_rate": 5.236406547393185e-06, "loss": 18.1633, "step": 27376 }, { "epoch": 0.5004295611164933, "grad_norm": 4.794110991390027, "learning_rate": 5.236110866496383e-06, "loss": 16.9117, "step": 27377 }, { "epoch": 0.5004478403129399, "grad_norm": 6.997077513138833, "learning_rate": 5.2358151847720315e-06, "loss": 17.6769, "step": 27378 }, { "epoch": 0.5004661195093864, "grad_norm": 7.3662656254532335, "learning_rate": 5.2355195022211695e-06, "loss": 17.7404, "step": 27379 }, { "epoch": 0.500484398705833, "grad_norm": 6.483880158315783, "learning_rate": 5.235223818844832e-06, "loss": 17.8944, "step": 27380 }, { "epoch": 0.5005026779022794, "grad_norm": 7.234928813027564, "learning_rate": 5.234928134644054e-06, "loss": 17.5484, "step": 27381 }, { "epoch": 0.5005209570987259, "grad_norm": 7.261600480159182, "learning_rate": 5.234632449619873e-06, "loss": 17.8835, "step": 27382 }, { "epoch": 0.5005392362951725, "grad_norm": 6.238666607560452, "learning_rate": 5.234336763773326e-06, "loss": 17.2874, "step": 27383 }, { "epoch": 0.500557515491619, "grad_norm": 6.396814720909468, "learning_rate": 5.234041077105451e-06, "loss": 17.2575, "step": 27384 }, { "epoch": 0.5005757946880656, "grad_norm": 6.363811787678032, "learning_rate": 5.233745389617281e-06, "loss": 17.7276, "step": 27385 }, { "epoch": 0.500594073884512, "grad_norm": 5.8440359777593365, "learning_rate": 5.233449701309853e-06, "loss": 17.3754, "step": 27386 }, { "epoch": 0.5006123530809585, "grad_norm": 6.120363833446666, "learning_rate": 5.233154012184205e-06, "loss": 17.3495, "step": 27387 }, { "epoch": 0.5006306322774051, "grad_norm": 7.57091183675726, "learning_rate": 5.232858322241373e-06, "loss": 17.618, "step": 27388 }, { "epoch": 0.5006489114738516, "grad_norm": 5.809761818613197, "learning_rate": 5.232562631482392e-06, "loss": 17.3301, "step": 27389 }, { "epoch": 0.5006671906702981, "grad_norm": 5.935096577307627, "learning_rate": 5.232266939908298e-06, "loss": 17.3575, "step": 27390 }, { "epoch": 0.5006854698667447, "grad_norm": 5.683414427115721, "learning_rate": 5.23197124752013e-06, "loss": 17.4249, "step": 27391 }, { "epoch": 0.5007037490631912, "grad_norm": 7.672901328723398, "learning_rate": 5.231675554318923e-06, "loss": 18.2358, "step": 27392 }, { "epoch": 0.5007220282596377, "grad_norm": 5.51855032814968, "learning_rate": 5.2313798603057135e-06, "loss": 17.2993, "step": 27393 }, { "epoch": 0.5007403074560842, "grad_norm": 4.902807267884883, "learning_rate": 5.2310841654815355e-06, "loss": 17.0361, "step": 27394 }, { "epoch": 0.5007585866525307, "grad_norm": 5.574659021420579, "learning_rate": 5.23078846984743e-06, "loss": 17.4636, "step": 27395 }, { "epoch": 0.5007768658489773, "grad_norm": 6.5940863560363825, "learning_rate": 5.2304927734044295e-06, "loss": 17.5887, "step": 27396 }, { "epoch": 0.5007951450454238, "grad_norm": 5.592720218805488, "learning_rate": 5.2301970761535725e-06, "loss": 17.3221, "step": 27397 }, { "epoch": 0.5008134242418704, "grad_norm": 5.166934862750008, "learning_rate": 5.229901378095895e-06, "loss": 17.1445, "step": 27398 }, { "epoch": 0.5008317034383168, "grad_norm": 7.109736810296239, "learning_rate": 5.229605679232432e-06, "loss": 17.6091, "step": 27399 }, { "epoch": 0.5008499826347633, "grad_norm": 6.148882070907928, "learning_rate": 5.229309979564221e-06, "loss": 17.6021, "step": 27400 }, { "epoch": 0.5008682618312099, "grad_norm": 7.179005321709902, "learning_rate": 5.2290142790923e-06, "loss": 17.991, "step": 27401 }, { "epoch": 0.5008865410276564, "grad_norm": 5.461564279100281, "learning_rate": 5.228718577817703e-06, "loss": 17.2314, "step": 27402 }, { "epoch": 0.500904820224103, "grad_norm": 6.55343778634408, "learning_rate": 5.228422875741467e-06, "loss": 17.3584, "step": 27403 }, { "epoch": 0.5009230994205495, "grad_norm": 5.462835394014223, "learning_rate": 5.228127172864627e-06, "loss": 17.1289, "step": 27404 }, { "epoch": 0.500941378616996, "grad_norm": 6.905041544785173, "learning_rate": 5.227831469188225e-06, "loss": 17.823, "step": 27405 }, { "epoch": 0.5009596578134425, "grad_norm": 7.102553762331364, "learning_rate": 5.227535764713291e-06, "loss": 17.7111, "step": 27406 }, { "epoch": 0.500977937009889, "grad_norm": 7.158427242021332, "learning_rate": 5.227240059440865e-06, "loss": 17.4464, "step": 27407 }, { "epoch": 0.5009962162063356, "grad_norm": 7.132729504524786, "learning_rate": 5.2269443533719814e-06, "loss": 17.7173, "step": 27408 }, { "epoch": 0.5010144954027821, "grad_norm": 7.428577864623479, "learning_rate": 5.226648646507677e-06, "loss": 18.198, "step": 27409 }, { "epoch": 0.5010327745992286, "grad_norm": 6.282317811470806, "learning_rate": 5.2263529388489885e-06, "loss": 17.3669, "step": 27410 }, { "epoch": 0.5010510537956752, "grad_norm": 6.973185316094404, "learning_rate": 5.226057230396953e-06, "loss": 17.2822, "step": 27411 }, { "epoch": 0.5010693329921216, "grad_norm": 5.760768121814484, "learning_rate": 5.225761521152608e-06, "loss": 17.1108, "step": 27412 }, { "epoch": 0.5010876121885682, "grad_norm": 6.267662872351457, "learning_rate": 5.225465811116988e-06, "loss": 17.8989, "step": 27413 }, { "epoch": 0.5011058913850147, "grad_norm": 6.161751745976709, "learning_rate": 5.225170100291129e-06, "loss": 17.6454, "step": 27414 }, { "epoch": 0.5011241705814612, "grad_norm": 6.946668774645795, "learning_rate": 5.22487438867607e-06, "loss": 17.4902, "step": 27415 }, { "epoch": 0.5011424497779078, "grad_norm": 7.8009518378293246, "learning_rate": 5.224578676272844e-06, "loss": 18.0471, "step": 27416 }, { "epoch": 0.5011607289743543, "grad_norm": 8.100631348561828, "learning_rate": 5.2242829630824885e-06, "loss": 18.1581, "step": 27417 }, { "epoch": 0.5011790081708009, "grad_norm": 7.944855927452899, "learning_rate": 5.223987249106042e-06, "loss": 18.2589, "step": 27418 }, { "epoch": 0.5011972873672473, "grad_norm": 5.1268255941988405, "learning_rate": 5.2236915343445404e-06, "loss": 17.0776, "step": 27419 }, { "epoch": 0.5012155665636938, "grad_norm": 7.138204920686909, "learning_rate": 5.223395818799019e-06, "loss": 18.2867, "step": 27420 }, { "epoch": 0.5012338457601404, "grad_norm": 7.551431483059387, "learning_rate": 5.223100102470513e-06, "loss": 17.6356, "step": 27421 }, { "epoch": 0.5012521249565869, "grad_norm": 6.532834147764727, "learning_rate": 5.222804385360062e-06, "loss": 17.6956, "step": 27422 }, { "epoch": 0.5012704041530335, "grad_norm": 7.167663155300319, "learning_rate": 5.222508667468701e-06, "loss": 17.2327, "step": 27423 }, { "epoch": 0.50128868334948, "grad_norm": 6.731071205187084, "learning_rate": 5.222212948797466e-06, "loss": 17.6541, "step": 27424 }, { "epoch": 0.5013069625459264, "grad_norm": 5.914110127603212, "learning_rate": 5.221917229347395e-06, "loss": 17.4401, "step": 27425 }, { "epoch": 0.501325241742373, "grad_norm": 6.690376471128805, "learning_rate": 5.221621509119521e-06, "loss": 17.3074, "step": 27426 }, { "epoch": 0.5013435209388195, "grad_norm": 6.600748489199835, "learning_rate": 5.221325788114884e-06, "loss": 17.7168, "step": 27427 }, { "epoch": 0.5013618001352661, "grad_norm": 6.278334865841477, "learning_rate": 5.22103006633452e-06, "loss": 17.583, "step": 27428 }, { "epoch": 0.5013800793317126, "grad_norm": 6.644632845121036, "learning_rate": 5.220734343779465e-06, "loss": 17.7029, "step": 27429 }, { "epoch": 0.5013983585281591, "grad_norm": 7.3252409032340005, "learning_rate": 5.220438620450754e-06, "loss": 17.3549, "step": 27430 }, { "epoch": 0.5014166377246057, "grad_norm": 5.853935514389421, "learning_rate": 5.220142896349424e-06, "loss": 17.3847, "step": 27431 }, { "epoch": 0.5014349169210521, "grad_norm": 4.588873686307592, "learning_rate": 5.219847171476515e-06, "loss": 16.8352, "step": 27432 }, { "epoch": 0.5014531961174987, "grad_norm": 6.392448853003408, "learning_rate": 5.2195514458330585e-06, "loss": 17.4353, "step": 27433 }, { "epoch": 0.5014714753139452, "grad_norm": 7.0629340071061835, "learning_rate": 5.219255719420095e-06, "loss": 18.0061, "step": 27434 }, { "epoch": 0.5014897545103917, "grad_norm": 9.441199465210085, "learning_rate": 5.218959992238658e-06, "loss": 17.6992, "step": 27435 }, { "epoch": 0.5015080337068383, "grad_norm": 6.942934271494722, "learning_rate": 5.218664264289786e-06, "loss": 17.6394, "step": 27436 }, { "epoch": 0.5015263129032848, "grad_norm": 6.086372515573157, "learning_rate": 5.2183685355745126e-06, "loss": 17.388, "step": 27437 }, { "epoch": 0.5015445920997313, "grad_norm": 6.790859367633346, "learning_rate": 5.218072806093879e-06, "loss": 17.5098, "step": 27438 }, { "epoch": 0.5015628712961778, "grad_norm": 6.377041254405387, "learning_rate": 5.217777075848918e-06, "loss": 17.4578, "step": 27439 }, { "epoch": 0.5015811504926243, "grad_norm": 8.609211270087672, "learning_rate": 5.217481344840667e-06, "loss": 17.4983, "step": 27440 }, { "epoch": 0.5015994296890709, "grad_norm": 5.416409304891662, "learning_rate": 5.217185613070164e-06, "loss": 17.1505, "step": 27441 }, { "epoch": 0.5016177088855174, "grad_norm": 6.184904764132652, "learning_rate": 5.2168898805384424e-06, "loss": 17.5776, "step": 27442 }, { "epoch": 0.501635988081964, "grad_norm": 6.135180400266057, "learning_rate": 5.216594147246543e-06, "loss": 17.5401, "step": 27443 }, { "epoch": 0.5016542672784104, "grad_norm": 7.028843924673126, "learning_rate": 5.216298413195497e-06, "loss": 17.506, "step": 27444 }, { "epoch": 0.5016725464748569, "grad_norm": 5.641817449703513, "learning_rate": 5.216002678386346e-06, "loss": 17.2133, "step": 27445 }, { "epoch": 0.5016908256713035, "grad_norm": 5.1560586955278405, "learning_rate": 5.215706942820124e-06, "loss": 17.0432, "step": 27446 }, { "epoch": 0.50170910486775, "grad_norm": 6.549285818228365, "learning_rate": 5.215411206497868e-06, "loss": 17.5745, "step": 27447 }, { "epoch": 0.5017273840641966, "grad_norm": 7.7095342333496815, "learning_rate": 5.215115469420614e-06, "loss": 18.2654, "step": 27448 }, { "epoch": 0.5017456632606431, "grad_norm": 5.941606628883112, "learning_rate": 5.214819731589398e-06, "loss": 17.1361, "step": 27449 }, { "epoch": 0.5017639424570896, "grad_norm": 7.8957610329637165, "learning_rate": 5.214523993005259e-06, "loss": 17.9532, "step": 27450 }, { "epoch": 0.5017822216535361, "grad_norm": 6.372576546619049, "learning_rate": 5.214228253669232e-06, "loss": 17.3834, "step": 27451 }, { "epoch": 0.5018005008499826, "grad_norm": 7.6097126215648245, "learning_rate": 5.213932513582353e-06, "loss": 18.1436, "step": 27452 }, { "epoch": 0.5018187800464292, "grad_norm": 5.852300587894222, "learning_rate": 5.2136367727456595e-06, "loss": 17.1411, "step": 27453 }, { "epoch": 0.5018370592428757, "grad_norm": 9.691714048624561, "learning_rate": 5.2133410311601875e-06, "loss": 18.9537, "step": 27454 }, { "epoch": 0.5018553384393222, "grad_norm": 7.197165582650849, "learning_rate": 5.2130452888269725e-06, "loss": 17.9285, "step": 27455 }, { "epoch": 0.5018736176357688, "grad_norm": 6.790190706934791, "learning_rate": 5.212749545747053e-06, "loss": 17.5413, "step": 27456 }, { "epoch": 0.5018918968322152, "grad_norm": 5.920475628504475, "learning_rate": 5.212453801921467e-06, "loss": 17.2972, "step": 27457 }, { "epoch": 0.5019101760286617, "grad_norm": 5.7450883295921695, "learning_rate": 5.2121580573512456e-06, "loss": 17.2121, "step": 27458 }, { "epoch": 0.5019284552251083, "grad_norm": 6.129887306500934, "learning_rate": 5.21186231203743e-06, "loss": 17.3798, "step": 27459 }, { "epoch": 0.5019467344215548, "grad_norm": 6.389799105005755, "learning_rate": 5.2115665659810555e-06, "loss": 17.4598, "step": 27460 }, { "epoch": 0.5019650136180014, "grad_norm": 7.487780545050386, "learning_rate": 5.211270819183159e-06, "loss": 17.9528, "step": 27461 }, { "epoch": 0.5019832928144479, "grad_norm": 6.545697454607325, "learning_rate": 5.210975071644776e-06, "loss": 17.2926, "step": 27462 }, { "epoch": 0.5020015720108943, "grad_norm": 5.724842236748844, "learning_rate": 5.210679323366943e-06, "loss": 17.2105, "step": 27463 }, { "epoch": 0.5020198512073409, "grad_norm": 5.7749935514895725, "learning_rate": 5.210383574350698e-06, "loss": 17.0421, "step": 27464 }, { "epoch": 0.5020381304037874, "grad_norm": 5.319072944307387, "learning_rate": 5.210087824597076e-06, "loss": 16.9669, "step": 27465 }, { "epoch": 0.502056409600234, "grad_norm": 6.916192023352253, "learning_rate": 5.209792074107116e-06, "loss": 17.661, "step": 27466 }, { "epoch": 0.5020746887966805, "grad_norm": 6.52839170697784, "learning_rate": 5.209496322881852e-06, "loss": 17.5856, "step": 27467 }, { "epoch": 0.502092967993127, "grad_norm": 5.7575835279806, "learning_rate": 5.209200570922322e-06, "loss": 17.2502, "step": 27468 }, { "epoch": 0.5021112471895736, "grad_norm": 5.21937368770072, "learning_rate": 5.208904818229561e-06, "loss": 17.2321, "step": 27469 }, { "epoch": 0.50212952638602, "grad_norm": 8.122590314842842, "learning_rate": 5.2086090648046096e-06, "loss": 18.061, "step": 27470 }, { "epoch": 0.5021478055824666, "grad_norm": 5.781673897216666, "learning_rate": 5.2083133106484986e-06, "loss": 17.1307, "step": 27471 }, { "epoch": 0.5021660847789131, "grad_norm": 6.9870143769618895, "learning_rate": 5.208017555762268e-06, "loss": 17.7875, "step": 27472 }, { "epoch": 0.5021843639753596, "grad_norm": 6.684202125132718, "learning_rate": 5.207721800146954e-06, "loss": 17.4909, "step": 27473 }, { "epoch": 0.5022026431718062, "grad_norm": 6.565595420450179, "learning_rate": 5.2074260438035954e-06, "loss": 17.5612, "step": 27474 }, { "epoch": 0.5022209223682527, "grad_norm": 6.897436635906595, "learning_rate": 5.207130286733224e-06, "loss": 17.6166, "step": 27475 }, { "epoch": 0.5022392015646993, "grad_norm": 6.051510521759804, "learning_rate": 5.206834528936878e-06, "loss": 17.5767, "step": 27476 }, { "epoch": 0.5022574807611457, "grad_norm": 4.660172711916454, "learning_rate": 5.206538770415598e-06, "loss": 16.8226, "step": 27477 }, { "epoch": 0.5022757599575922, "grad_norm": 5.502986954221638, "learning_rate": 5.206243011170415e-06, "loss": 17.023, "step": 27478 }, { "epoch": 0.5022940391540388, "grad_norm": 5.978417104720057, "learning_rate": 5.205947251202369e-06, "loss": 17.5388, "step": 27479 }, { "epoch": 0.5023123183504853, "grad_norm": 6.8248904751761765, "learning_rate": 5.205651490512496e-06, "loss": 17.613, "step": 27480 }, { "epoch": 0.5023305975469319, "grad_norm": 4.8724367239266, "learning_rate": 5.205355729101833e-06, "loss": 17.0065, "step": 27481 }, { "epoch": 0.5023488767433784, "grad_norm": 5.961520441985069, "learning_rate": 5.205059966971415e-06, "loss": 17.6608, "step": 27482 }, { "epoch": 0.5023671559398248, "grad_norm": 5.884050484409216, "learning_rate": 5.204764204122279e-06, "loss": 17.1868, "step": 27483 }, { "epoch": 0.5023854351362714, "grad_norm": 6.253560070917388, "learning_rate": 5.2044684405554645e-06, "loss": 17.4443, "step": 27484 }, { "epoch": 0.5024037143327179, "grad_norm": 8.408789749844162, "learning_rate": 5.204172676272003e-06, "loss": 18.3138, "step": 27485 }, { "epoch": 0.5024219935291645, "grad_norm": 6.558372316727946, "learning_rate": 5.203876911272936e-06, "loss": 17.6935, "step": 27486 }, { "epoch": 0.502440272725611, "grad_norm": 6.158160751081932, "learning_rate": 5.203581145559298e-06, "loss": 17.2859, "step": 27487 }, { "epoch": 0.5024585519220575, "grad_norm": 6.208609058868916, "learning_rate": 5.2032853791321255e-06, "loss": 17.3936, "step": 27488 }, { "epoch": 0.502476831118504, "grad_norm": 7.848355301758456, "learning_rate": 5.202989611992455e-06, "loss": 17.9317, "step": 27489 }, { "epoch": 0.5024951103149505, "grad_norm": 6.685016959708192, "learning_rate": 5.202693844141322e-06, "loss": 17.299, "step": 27490 }, { "epoch": 0.5025133895113971, "grad_norm": 5.288702752183807, "learning_rate": 5.202398075579767e-06, "loss": 17.035, "step": 27491 }, { "epoch": 0.5025316687078436, "grad_norm": 8.106273503780569, "learning_rate": 5.202102306308825e-06, "loss": 17.9173, "step": 27492 }, { "epoch": 0.5025499479042901, "grad_norm": 6.0398167216408885, "learning_rate": 5.2018065363295304e-06, "loss": 17.4087, "step": 27493 }, { "epoch": 0.5025682271007367, "grad_norm": 7.557718648557547, "learning_rate": 5.201510765642922e-06, "loss": 17.7971, "step": 27494 }, { "epoch": 0.5025865062971832, "grad_norm": 6.054882255550186, "learning_rate": 5.201214994250034e-06, "loss": 17.3972, "step": 27495 }, { "epoch": 0.5026047854936297, "grad_norm": 7.194471640590093, "learning_rate": 5.200919222151908e-06, "loss": 17.9227, "step": 27496 }, { "epoch": 0.5026230646900762, "grad_norm": 7.479360695694727, "learning_rate": 5.200623449349575e-06, "loss": 18.0845, "step": 27497 }, { "epoch": 0.5026413438865227, "grad_norm": 6.683254102833924, "learning_rate": 5.200327675844076e-06, "loss": 17.4318, "step": 27498 }, { "epoch": 0.5026596230829693, "grad_norm": 6.748717926325936, "learning_rate": 5.200031901636444e-06, "loss": 17.6456, "step": 27499 }, { "epoch": 0.5026779022794158, "grad_norm": 6.117365829160261, "learning_rate": 5.199736126727719e-06, "loss": 17.3438, "step": 27500 }, { "epoch": 0.5026961814758624, "grad_norm": 6.642969667594831, "learning_rate": 5.199440351118936e-06, "loss": 17.4521, "step": 27501 }, { "epoch": 0.5027144606723088, "grad_norm": 6.546327008119256, "learning_rate": 5.199144574811132e-06, "loss": 17.5044, "step": 27502 }, { "epoch": 0.5027327398687553, "grad_norm": 5.349614452347511, "learning_rate": 5.198848797805343e-06, "loss": 16.9952, "step": 27503 }, { "epoch": 0.5027510190652019, "grad_norm": 6.806322501820001, "learning_rate": 5.198553020102606e-06, "loss": 17.7437, "step": 27504 }, { "epoch": 0.5027692982616484, "grad_norm": 6.504506895365117, "learning_rate": 5.198257241703959e-06, "loss": 17.6166, "step": 27505 }, { "epoch": 0.502787577458095, "grad_norm": 6.8308968273401645, "learning_rate": 5.1979614626104365e-06, "loss": 17.7568, "step": 27506 }, { "epoch": 0.5028058566545415, "grad_norm": 6.600709319039463, "learning_rate": 5.197665682823076e-06, "loss": 17.5428, "step": 27507 }, { "epoch": 0.502824135850988, "grad_norm": 6.2470534123693895, "learning_rate": 5.197369902342916e-06, "loss": 17.3073, "step": 27508 }, { "epoch": 0.5028424150474345, "grad_norm": 6.976116280348958, "learning_rate": 5.197074121170991e-06, "loss": 17.7634, "step": 27509 }, { "epoch": 0.502860694243881, "grad_norm": 6.1875155221474625, "learning_rate": 5.196778339308338e-06, "loss": 17.3329, "step": 27510 }, { "epoch": 0.5028789734403276, "grad_norm": 6.46088522994846, "learning_rate": 5.196482556755994e-06, "loss": 17.599, "step": 27511 }, { "epoch": 0.5028972526367741, "grad_norm": 8.997033569978639, "learning_rate": 5.196186773514995e-06, "loss": 18.3507, "step": 27512 }, { "epoch": 0.5029155318332206, "grad_norm": 5.922053130112032, "learning_rate": 5.19589098958638e-06, "loss": 17.4276, "step": 27513 }, { "epoch": 0.5029338110296672, "grad_norm": 5.7468085657305625, "learning_rate": 5.195595204971182e-06, "loss": 17.3735, "step": 27514 }, { "epoch": 0.5029520902261136, "grad_norm": 6.405522064361517, "learning_rate": 5.195299419670442e-06, "loss": 17.8147, "step": 27515 }, { "epoch": 0.5029703694225602, "grad_norm": 7.037740565033126, "learning_rate": 5.195003633685194e-06, "loss": 17.6155, "step": 27516 }, { "epoch": 0.5029886486190067, "grad_norm": 5.7461921632343325, "learning_rate": 5.194707847016474e-06, "loss": 17.2436, "step": 27517 }, { "epoch": 0.5030069278154532, "grad_norm": 6.768562562646901, "learning_rate": 5.19441205966532e-06, "loss": 17.821, "step": 27518 }, { "epoch": 0.5030252070118998, "grad_norm": 6.994355393123101, "learning_rate": 5.194116271632769e-06, "loss": 17.6248, "step": 27519 }, { "epoch": 0.5030434862083463, "grad_norm": 5.945406372738656, "learning_rate": 5.193820482919858e-06, "loss": 17.2435, "step": 27520 }, { "epoch": 0.5030617654047929, "grad_norm": 5.974668663293852, "learning_rate": 5.193524693527623e-06, "loss": 17.3625, "step": 27521 }, { "epoch": 0.5030800446012393, "grad_norm": 6.9194982401268526, "learning_rate": 5.1932289034571e-06, "loss": 18.2391, "step": 27522 }, { "epoch": 0.5030983237976858, "grad_norm": 7.315381405321791, "learning_rate": 5.192933112709326e-06, "loss": 17.9562, "step": 27523 }, { "epoch": 0.5031166029941324, "grad_norm": 6.594120925526298, "learning_rate": 5.1926373212853385e-06, "loss": 17.5046, "step": 27524 }, { "epoch": 0.5031348821905789, "grad_norm": 7.304940661604103, "learning_rate": 5.192341529186175e-06, "loss": 17.7933, "step": 27525 }, { "epoch": 0.5031531613870254, "grad_norm": 5.483380648253276, "learning_rate": 5.19204573641287e-06, "loss": 17.1572, "step": 27526 }, { "epoch": 0.503171440583472, "grad_norm": 6.890729723949929, "learning_rate": 5.191749942966462e-06, "loss": 17.8273, "step": 27527 }, { "epoch": 0.5031897197799184, "grad_norm": 5.600074806366154, "learning_rate": 5.191454148847986e-06, "loss": 17.2443, "step": 27528 }, { "epoch": 0.503207998976365, "grad_norm": 5.512225238396698, "learning_rate": 5.191158354058482e-06, "loss": 17.1353, "step": 27529 }, { "epoch": 0.5032262781728115, "grad_norm": 6.097103260232165, "learning_rate": 5.190862558598983e-06, "loss": 17.3884, "step": 27530 }, { "epoch": 0.503244557369258, "grad_norm": 5.2372114164909735, "learning_rate": 5.190566762470527e-06, "loss": 16.9618, "step": 27531 }, { "epoch": 0.5032628365657046, "grad_norm": 7.533783967663297, "learning_rate": 5.190270965674152e-06, "loss": 18.1905, "step": 27532 }, { "epoch": 0.5032811157621511, "grad_norm": 5.798360449902278, "learning_rate": 5.189975168210893e-06, "loss": 17.3989, "step": 27533 }, { "epoch": 0.5032993949585977, "grad_norm": 6.640908870126543, "learning_rate": 5.189679370081789e-06, "loss": 17.4577, "step": 27534 }, { "epoch": 0.5033176741550441, "grad_norm": 5.080109637563325, "learning_rate": 5.189383571287872e-06, "loss": 16.9935, "step": 27535 }, { "epoch": 0.5033359533514906, "grad_norm": 6.912073437888923, "learning_rate": 5.189087771830186e-06, "loss": 17.6632, "step": 27536 }, { "epoch": 0.5033542325479372, "grad_norm": 6.575914356282273, "learning_rate": 5.188791971709761e-06, "loss": 17.4028, "step": 27537 }, { "epoch": 0.5033725117443837, "grad_norm": 5.540881853619029, "learning_rate": 5.188496170927637e-06, "loss": 17.2044, "step": 27538 }, { "epoch": 0.5033907909408303, "grad_norm": 7.219808193319258, "learning_rate": 5.1882003694848515e-06, "loss": 17.7435, "step": 27539 }, { "epoch": 0.5034090701372768, "grad_norm": 7.271662511375215, "learning_rate": 5.187904567382439e-06, "loss": 18.1021, "step": 27540 }, { "epoch": 0.5034273493337232, "grad_norm": 6.789408787706083, "learning_rate": 5.187608764621437e-06, "loss": 17.6288, "step": 27541 }, { "epoch": 0.5034456285301698, "grad_norm": 6.929807585433291, "learning_rate": 5.187312961202882e-06, "loss": 17.5508, "step": 27542 }, { "epoch": 0.5034639077266163, "grad_norm": 5.757440779569821, "learning_rate": 5.187017157127815e-06, "loss": 17.2604, "step": 27543 }, { "epoch": 0.5034821869230629, "grad_norm": 6.736364591735982, "learning_rate": 5.186721352397265e-06, "loss": 17.5449, "step": 27544 }, { "epoch": 0.5035004661195094, "grad_norm": 5.680884503028648, "learning_rate": 5.186425547012275e-06, "loss": 17.0263, "step": 27545 }, { "epoch": 0.5035187453159559, "grad_norm": 4.685044570264193, "learning_rate": 5.18612974097388e-06, "loss": 16.8804, "step": 27546 }, { "epoch": 0.5035370245124025, "grad_norm": 7.224721385841569, "learning_rate": 5.185833934283114e-06, "loss": 17.9704, "step": 27547 }, { "epoch": 0.5035553037088489, "grad_norm": 6.933162705042278, "learning_rate": 5.185538126941019e-06, "loss": 17.9275, "step": 27548 }, { "epoch": 0.5035735829052955, "grad_norm": 5.864375061142779, "learning_rate": 5.1852423189486256e-06, "loss": 17.6488, "step": 27549 }, { "epoch": 0.503591862101742, "grad_norm": 7.11665122116013, "learning_rate": 5.184946510306977e-06, "loss": 17.5661, "step": 27550 }, { "epoch": 0.5036101412981885, "grad_norm": 6.304113181307043, "learning_rate": 5.184650701017105e-06, "loss": 17.5131, "step": 27551 }, { "epoch": 0.5036284204946351, "grad_norm": 5.737618692572437, "learning_rate": 5.18435489108005e-06, "loss": 16.9711, "step": 27552 }, { "epoch": 0.5036466996910816, "grad_norm": 5.700244822219272, "learning_rate": 5.184059080496846e-06, "loss": 17.1295, "step": 27553 }, { "epoch": 0.5036649788875281, "grad_norm": 5.862370704788984, "learning_rate": 5.183763269268531e-06, "loss": 17.1625, "step": 27554 }, { "epoch": 0.5036832580839746, "grad_norm": 5.909596797048763, "learning_rate": 5.183467457396142e-06, "loss": 17.1662, "step": 27555 }, { "epoch": 0.5037015372804211, "grad_norm": 8.056279847860345, "learning_rate": 5.183171644880714e-06, "loss": 17.7257, "step": 27556 }, { "epoch": 0.5037198164768677, "grad_norm": 6.257912896267873, "learning_rate": 5.182875831723288e-06, "loss": 17.5899, "step": 27557 }, { "epoch": 0.5037380956733142, "grad_norm": 7.619659666477824, "learning_rate": 5.1825800179248964e-06, "loss": 17.6511, "step": 27558 }, { "epoch": 0.5037563748697608, "grad_norm": 8.25092517858032, "learning_rate": 5.182284203486577e-06, "loss": 18.1767, "step": 27559 }, { "epoch": 0.5037746540662073, "grad_norm": 5.824274287405943, "learning_rate": 5.1819883884093705e-06, "loss": 17.31, "step": 27560 }, { "epoch": 0.5037929332626537, "grad_norm": 6.923456020475565, "learning_rate": 5.181692572694308e-06, "loss": 17.7274, "step": 27561 }, { "epoch": 0.5038112124591003, "grad_norm": 7.702178478700586, "learning_rate": 5.181396756342428e-06, "loss": 17.6612, "step": 27562 }, { "epoch": 0.5038294916555468, "grad_norm": 5.526224845757836, "learning_rate": 5.18110093935477e-06, "loss": 17.2442, "step": 27563 }, { "epoch": 0.5038477708519934, "grad_norm": 5.925765686166306, "learning_rate": 5.180805121732367e-06, "loss": 17.246, "step": 27564 }, { "epoch": 0.5038660500484399, "grad_norm": 6.314698857761973, "learning_rate": 5.18050930347626e-06, "loss": 17.4263, "step": 27565 }, { "epoch": 0.5038843292448864, "grad_norm": 6.400760567150828, "learning_rate": 5.180213484587482e-06, "loss": 17.4009, "step": 27566 }, { "epoch": 0.5039026084413329, "grad_norm": 6.066075711125857, "learning_rate": 5.179917665067072e-06, "loss": 17.5616, "step": 27567 }, { "epoch": 0.5039208876377794, "grad_norm": 6.74359468662774, "learning_rate": 5.179621844916065e-06, "loss": 17.6614, "step": 27568 }, { "epoch": 0.503939166834226, "grad_norm": 6.5324995219543736, "learning_rate": 5.1793260241355e-06, "loss": 17.2444, "step": 27569 }, { "epoch": 0.5039574460306725, "grad_norm": 6.916770101727999, "learning_rate": 5.179030202726414e-06, "loss": 18.1773, "step": 27570 }, { "epoch": 0.503975725227119, "grad_norm": 5.983715099934033, "learning_rate": 5.17873438068984e-06, "loss": 17.2402, "step": 27571 }, { "epoch": 0.5039940044235656, "grad_norm": 5.256466208380804, "learning_rate": 5.178438558026819e-06, "loss": 17.3271, "step": 27572 }, { "epoch": 0.504012283620012, "grad_norm": 4.865510599169104, "learning_rate": 5.178142734738386e-06, "loss": 16.9278, "step": 27573 }, { "epoch": 0.5040305628164586, "grad_norm": 6.009582623410873, "learning_rate": 5.17784691082558e-06, "loss": 17.3797, "step": 27574 }, { "epoch": 0.5040488420129051, "grad_norm": 6.029082880362614, "learning_rate": 5.177551086289434e-06, "loss": 17.3101, "step": 27575 }, { "epoch": 0.5040671212093516, "grad_norm": 6.645481189205907, "learning_rate": 5.177255261130987e-06, "loss": 17.4678, "step": 27576 }, { "epoch": 0.5040854004057982, "grad_norm": 6.868423411372092, "learning_rate": 5.1769594353512765e-06, "loss": 17.8212, "step": 27577 }, { "epoch": 0.5041036796022447, "grad_norm": 6.7974656322543865, "learning_rate": 5.1766636089513375e-06, "loss": 17.5799, "step": 27578 }, { "epoch": 0.5041219587986913, "grad_norm": 9.202097818861546, "learning_rate": 5.176367781932209e-06, "loss": 17.9019, "step": 27579 }, { "epoch": 0.5041402379951377, "grad_norm": 6.546695558542694, "learning_rate": 5.176071954294926e-06, "loss": 17.2939, "step": 27580 }, { "epoch": 0.5041585171915842, "grad_norm": 4.114361230779656, "learning_rate": 5.175776126040526e-06, "loss": 16.5922, "step": 27581 }, { "epoch": 0.5041767963880308, "grad_norm": 5.446068971978248, "learning_rate": 5.175480297170047e-06, "loss": 17.1501, "step": 27582 }, { "epoch": 0.5041950755844773, "grad_norm": 6.528465725908221, "learning_rate": 5.1751844676845234e-06, "loss": 17.6874, "step": 27583 }, { "epoch": 0.5042133547809239, "grad_norm": 6.263069703194509, "learning_rate": 5.174888637584995e-06, "loss": 17.3593, "step": 27584 }, { "epoch": 0.5042316339773704, "grad_norm": 7.089100425270701, "learning_rate": 5.174592806872495e-06, "loss": 17.7325, "step": 27585 }, { "epoch": 0.5042499131738168, "grad_norm": 6.952406788040247, "learning_rate": 5.174296975548063e-06, "loss": 17.8632, "step": 27586 }, { "epoch": 0.5042681923702634, "grad_norm": 6.702582100217042, "learning_rate": 5.1740011436127355e-06, "loss": 17.5542, "step": 27587 }, { "epoch": 0.5042864715667099, "grad_norm": 6.415982961407396, "learning_rate": 5.1737053110675505e-06, "loss": 17.8005, "step": 27588 }, { "epoch": 0.5043047507631565, "grad_norm": 6.657096792187528, "learning_rate": 5.173409477913543e-06, "loss": 17.6757, "step": 27589 }, { "epoch": 0.504323029959603, "grad_norm": 7.168338115829753, "learning_rate": 5.173113644151748e-06, "loss": 17.7773, "step": 27590 }, { "epoch": 0.5043413091560495, "grad_norm": 4.892969233631025, "learning_rate": 5.172817809783207e-06, "loss": 16.8434, "step": 27591 }, { "epoch": 0.5043595883524961, "grad_norm": 5.473425059303431, "learning_rate": 5.172521974808954e-06, "loss": 17.2269, "step": 27592 }, { "epoch": 0.5043778675489425, "grad_norm": 6.62752555391626, "learning_rate": 5.1722261392300265e-06, "loss": 17.6366, "step": 27593 }, { "epoch": 0.504396146745389, "grad_norm": 5.984820671045974, "learning_rate": 5.171930303047461e-06, "loss": 17.1321, "step": 27594 }, { "epoch": 0.5044144259418356, "grad_norm": 7.448722269027972, "learning_rate": 5.171634466262294e-06, "loss": 17.9497, "step": 27595 }, { "epoch": 0.5044327051382821, "grad_norm": 5.5853500569532955, "learning_rate": 5.171338628875564e-06, "loss": 17.2253, "step": 27596 }, { "epoch": 0.5044509843347287, "grad_norm": 6.771563707319088, "learning_rate": 5.171042790888308e-06, "loss": 17.549, "step": 27597 }, { "epoch": 0.5044692635311752, "grad_norm": 5.871899978894757, "learning_rate": 5.170746952301559e-06, "loss": 17.4473, "step": 27598 }, { "epoch": 0.5044875427276216, "grad_norm": 6.274713350608566, "learning_rate": 5.170451113116359e-06, "loss": 17.7429, "step": 27599 }, { "epoch": 0.5045058219240682, "grad_norm": 7.110533040292356, "learning_rate": 5.170155273333743e-06, "loss": 17.9841, "step": 27600 }, { "epoch": 0.5045241011205147, "grad_norm": 7.803695683487732, "learning_rate": 5.169859432954747e-06, "loss": 18.2242, "step": 27601 }, { "epoch": 0.5045423803169613, "grad_norm": 5.916541655408195, "learning_rate": 5.169563591980409e-06, "loss": 17.0129, "step": 27602 }, { "epoch": 0.5045606595134078, "grad_norm": 6.182438632464134, "learning_rate": 5.169267750411763e-06, "loss": 17.5817, "step": 27603 }, { "epoch": 0.5045789387098543, "grad_norm": 5.900076900146689, "learning_rate": 5.1689719082498494e-06, "loss": 17.128, "step": 27604 }, { "epoch": 0.5045972179063009, "grad_norm": 7.172817506058974, "learning_rate": 5.168676065495705e-06, "loss": 17.5771, "step": 27605 }, { "epoch": 0.5046154971027473, "grad_norm": 7.6274939751249615, "learning_rate": 5.168380222150364e-06, "loss": 17.8769, "step": 27606 }, { "epoch": 0.5046337762991939, "grad_norm": 5.404809349830576, "learning_rate": 5.1680843782148656e-06, "loss": 17.0407, "step": 27607 }, { "epoch": 0.5046520554956404, "grad_norm": 7.394237278623086, "learning_rate": 5.167788533690247e-06, "loss": 17.8788, "step": 27608 }, { "epoch": 0.5046703346920869, "grad_norm": 7.492467063448779, "learning_rate": 5.167492688577543e-06, "loss": 17.9748, "step": 27609 }, { "epoch": 0.5046886138885335, "grad_norm": 6.8790718888237, "learning_rate": 5.167196842877792e-06, "loss": 17.5047, "step": 27610 }, { "epoch": 0.50470689308498, "grad_norm": 6.597736836157785, "learning_rate": 5.1669009965920305e-06, "loss": 17.8067, "step": 27611 }, { "epoch": 0.5047251722814265, "grad_norm": 5.16417093993006, "learning_rate": 5.166605149721296e-06, "loss": 16.864, "step": 27612 }, { "epoch": 0.504743451477873, "grad_norm": 6.790951932401917, "learning_rate": 5.166309302266624e-06, "loss": 17.5408, "step": 27613 }, { "epoch": 0.5047617306743195, "grad_norm": 7.320218145959628, "learning_rate": 5.166013454229053e-06, "loss": 17.9132, "step": 27614 }, { "epoch": 0.5047800098707661, "grad_norm": 5.540957011762305, "learning_rate": 5.16571760560962e-06, "loss": 17.3871, "step": 27615 }, { "epoch": 0.5047982890672126, "grad_norm": 7.778922745046559, "learning_rate": 5.16542175640936e-06, "loss": 18.0185, "step": 27616 }, { "epoch": 0.5048165682636592, "grad_norm": 6.193097886338422, "learning_rate": 5.16512590662931e-06, "loss": 17.3566, "step": 27617 }, { "epoch": 0.5048348474601057, "grad_norm": 5.811583558003684, "learning_rate": 5.164830056270509e-06, "loss": 17.2262, "step": 27618 }, { "epoch": 0.5048531266565521, "grad_norm": 7.898633495034506, "learning_rate": 5.164534205333995e-06, "loss": 18.3214, "step": 27619 }, { "epoch": 0.5048714058529987, "grad_norm": 8.030837492772608, "learning_rate": 5.1642383538208005e-06, "loss": 18.3775, "step": 27620 }, { "epoch": 0.5048896850494452, "grad_norm": 5.723929271515542, "learning_rate": 5.163942501731966e-06, "loss": 17.4789, "step": 27621 }, { "epoch": 0.5049079642458918, "grad_norm": 5.230468112841281, "learning_rate": 5.163646649068527e-06, "loss": 16.9984, "step": 27622 }, { "epoch": 0.5049262434423383, "grad_norm": 6.128025454370273, "learning_rate": 5.16335079583152e-06, "loss": 17.399, "step": 27623 }, { "epoch": 0.5049445226387848, "grad_norm": 5.866331204761387, "learning_rate": 5.163054942021983e-06, "loss": 17.6067, "step": 27624 }, { "epoch": 0.5049628018352313, "grad_norm": 5.760854215968586, "learning_rate": 5.162759087640953e-06, "loss": 17.3237, "step": 27625 }, { "epoch": 0.5049810810316778, "grad_norm": 7.015165463723453, "learning_rate": 5.162463232689465e-06, "loss": 18.1369, "step": 27626 }, { "epoch": 0.5049993602281244, "grad_norm": 6.416469001773678, "learning_rate": 5.162167377168559e-06, "loss": 17.7597, "step": 27627 }, { "epoch": 0.5050176394245709, "grad_norm": 6.994677546386976, "learning_rate": 5.1618715210792704e-06, "loss": 18.0055, "step": 27628 }, { "epoch": 0.5050359186210174, "grad_norm": 7.063157435887147, "learning_rate": 5.161575664422637e-06, "loss": 17.8699, "step": 27629 }, { "epoch": 0.505054197817464, "grad_norm": 7.2920732637401215, "learning_rate": 5.161279807199692e-06, "loss": 18.198, "step": 27630 }, { "epoch": 0.5050724770139104, "grad_norm": 5.055173979027419, "learning_rate": 5.160983949411478e-06, "loss": 16.9205, "step": 27631 }, { "epoch": 0.505090756210357, "grad_norm": 5.647088359036415, "learning_rate": 5.1606880910590285e-06, "loss": 17.2178, "step": 27632 }, { "epoch": 0.5051090354068035, "grad_norm": 5.928155576463627, "learning_rate": 5.160392232143381e-06, "loss": 17.5289, "step": 27633 }, { "epoch": 0.50512731460325, "grad_norm": 6.396122099132778, "learning_rate": 5.160096372665573e-06, "loss": 17.3092, "step": 27634 }, { "epoch": 0.5051455937996966, "grad_norm": 5.2811810268772446, "learning_rate": 5.1598005126266395e-06, "loss": 17.0258, "step": 27635 }, { "epoch": 0.5051638729961431, "grad_norm": 6.033285483416313, "learning_rate": 5.159504652027621e-06, "loss": 17.4595, "step": 27636 }, { "epoch": 0.5051821521925897, "grad_norm": 6.461835547461277, "learning_rate": 5.159208790869552e-06, "loss": 17.6768, "step": 27637 }, { "epoch": 0.5052004313890361, "grad_norm": 6.103894956407447, "learning_rate": 5.158912929153469e-06, "loss": 17.0627, "step": 27638 }, { "epoch": 0.5052187105854826, "grad_norm": 5.478906843384525, "learning_rate": 5.158617066880411e-06, "loss": 16.9255, "step": 27639 }, { "epoch": 0.5052369897819292, "grad_norm": 7.640017289504977, "learning_rate": 5.158321204051414e-06, "loss": 17.9303, "step": 27640 }, { "epoch": 0.5052552689783757, "grad_norm": 8.514597003385521, "learning_rate": 5.158025340667514e-06, "loss": 18.0154, "step": 27641 }, { "epoch": 0.5052735481748223, "grad_norm": 5.810975657165897, "learning_rate": 5.157729476729749e-06, "loss": 17.0998, "step": 27642 }, { "epoch": 0.5052918273712688, "grad_norm": 5.553983838062384, "learning_rate": 5.1574336122391575e-06, "loss": 17.1545, "step": 27643 }, { "epoch": 0.5053101065677152, "grad_norm": 5.185153497954882, "learning_rate": 5.157137747196773e-06, "loss": 17.1105, "step": 27644 }, { "epoch": 0.5053283857641618, "grad_norm": 7.312979692958259, "learning_rate": 5.156841881603635e-06, "loss": 17.5566, "step": 27645 }, { "epoch": 0.5053466649606083, "grad_norm": 7.308497903257434, "learning_rate": 5.156546015460782e-06, "loss": 18.2654, "step": 27646 }, { "epoch": 0.5053649441570549, "grad_norm": 6.652847675639543, "learning_rate": 5.156250148769247e-06, "loss": 17.6146, "step": 27647 }, { "epoch": 0.5053832233535014, "grad_norm": 6.041361608402879, "learning_rate": 5.155954281530066e-06, "loss": 17.3519, "step": 27648 }, { "epoch": 0.5054015025499479, "grad_norm": 6.277524567147896, "learning_rate": 5.155658413744281e-06, "loss": 17.147, "step": 27649 }, { "epoch": 0.5054197817463945, "grad_norm": 5.30187731898524, "learning_rate": 5.155362545412928e-06, "loss": 17.1637, "step": 27650 }, { "epoch": 0.5054380609428409, "grad_norm": 6.06369297579374, "learning_rate": 5.1550666765370416e-06, "loss": 17.3441, "step": 27651 }, { "epoch": 0.5054563401392875, "grad_norm": 6.125803243534805, "learning_rate": 5.15477080711766e-06, "loss": 17.4234, "step": 27652 }, { "epoch": 0.505474619335734, "grad_norm": 6.902841414799843, "learning_rate": 5.1544749371558214e-06, "loss": 17.7937, "step": 27653 }, { "epoch": 0.5054928985321805, "grad_norm": 9.121673690437543, "learning_rate": 5.154179066652559e-06, "loss": 17.8539, "step": 27654 }, { "epoch": 0.5055111777286271, "grad_norm": 5.513348411451003, "learning_rate": 5.153883195608914e-06, "loss": 17.23, "step": 27655 }, { "epoch": 0.5055294569250736, "grad_norm": 7.644708440959603, "learning_rate": 5.153587324025921e-06, "loss": 18.2627, "step": 27656 }, { "epoch": 0.5055477361215202, "grad_norm": 5.872963130275385, "learning_rate": 5.153291451904621e-06, "loss": 17.2168, "step": 27657 }, { "epoch": 0.5055660153179666, "grad_norm": 6.047861589930542, "learning_rate": 5.1529955792460425e-06, "loss": 17.3844, "step": 27658 }, { "epoch": 0.5055842945144131, "grad_norm": 6.697298712603379, "learning_rate": 5.1526997060512305e-06, "loss": 17.7653, "step": 27659 }, { "epoch": 0.5056025737108597, "grad_norm": 5.831644306619482, "learning_rate": 5.1524038323212215e-06, "loss": 17.4758, "step": 27660 }, { "epoch": 0.5056208529073062, "grad_norm": 7.811983398180582, "learning_rate": 5.1521079580570464e-06, "loss": 17.9186, "step": 27661 }, { "epoch": 0.5056391321037527, "grad_norm": 8.365875514911632, "learning_rate": 5.151812083259747e-06, "loss": 18.0911, "step": 27662 }, { "epoch": 0.5056574113001993, "grad_norm": 7.142597196886949, "learning_rate": 5.151516207930361e-06, "loss": 17.9224, "step": 27663 }, { "epoch": 0.5056756904966457, "grad_norm": 7.144160869364189, "learning_rate": 5.151220332069923e-06, "loss": 17.8455, "step": 27664 }, { "epoch": 0.5056939696930923, "grad_norm": 5.225125562203611, "learning_rate": 5.15092445567947e-06, "loss": 17.0537, "step": 27665 }, { "epoch": 0.5057122488895388, "grad_norm": 6.164546195115452, "learning_rate": 5.1506285787600405e-06, "loss": 17.3785, "step": 27666 }, { "epoch": 0.5057305280859853, "grad_norm": 5.844485109282541, "learning_rate": 5.150332701312672e-06, "loss": 17.5462, "step": 27667 }, { "epoch": 0.5057488072824319, "grad_norm": 5.7195059066051215, "learning_rate": 5.150036823338399e-06, "loss": 17.2419, "step": 27668 }, { "epoch": 0.5057670864788784, "grad_norm": 7.8681767607495745, "learning_rate": 5.1497409448382605e-06, "loss": 17.7197, "step": 27669 }, { "epoch": 0.505785365675325, "grad_norm": 3.9105518559744006, "learning_rate": 5.149445065813294e-06, "loss": 16.5977, "step": 27670 }, { "epoch": 0.5058036448717714, "grad_norm": 6.170131995520045, "learning_rate": 5.1491491862645325e-06, "loss": 17.2897, "step": 27671 }, { "epoch": 0.5058219240682179, "grad_norm": 5.1358430281321175, "learning_rate": 5.148853306193018e-06, "loss": 17.7214, "step": 27672 }, { "epoch": 0.5058402032646645, "grad_norm": 5.856023897165747, "learning_rate": 5.148557425599786e-06, "loss": 17.4302, "step": 27673 }, { "epoch": 0.505858482461111, "grad_norm": 6.09834731008439, "learning_rate": 5.148261544485873e-06, "loss": 17.6248, "step": 27674 }, { "epoch": 0.5058767616575576, "grad_norm": 5.32953497411772, "learning_rate": 5.1479656628523166e-06, "loss": 16.9663, "step": 27675 }, { "epoch": 0.505895040854004, "grad_norm": 5.791288210493317, "learning_rate": 5.147669780700151e-06, "loss": 17.4501, "step": 27676 }, { "epoch": 0.5059133200504505, "grad_norm": 4.674975822255367, "learning_rate": 5.147373898030419e-06, "loss": 17.5492, "step": 27677 }, { "epoch": 0.5059315992468971, "grad_norm": 6.4795540493638315, "learning_rate": 5.147078014844152e-06, "loss": 17.4952, "step": 27678 }, { "epoch": 0.5059498784433436, "grad_norm": 5.87615287630539, "learning_rate": 5.14678213114239e-06, "loss": 17.3138, "step": 27679 }, { "epoch": 0.5059681576397902, "grad_norm": 6.823351620249338, "learning_rate": 5.146486246926169e-06, "loss": 17.7274, "step": 27680 }, { "epoch": 0.5059864368362367, "grad_norm": 6.636215937774113, "learning_rate": 5.1461903621965256e-06, "loss": 17.8749, "step": 27681 }, { "epoch": 0.5060047160326832, "grad_norm": 5.570020480201589, "learning_rate": 5.145894476954499e-06, "loss": 17.1821, "step": 27682 }, { "epoch": 0.5060229952291297, "grad_norm": 6.308932897503492, "learning_rate": 5.145598591201124e-06, "loss": 17.7802, "step": 27683 }, { "epoch": 0.5060412744255762, "grad_norm": 7.644739061584473, "learning_rate": 5.14530270493744e-06, "loss": 17.6652, "step": 27684 }, { "epoch": 0.5060595536220228, "grad_norm": 5.5404416303762725, "learning_rate": 5.145006818164482e-06, "loss": 17.2558, "step": 27685 }, { "epoch": 0.5060778328184693, "grad_norm": 5.951095898646831, "learning_rate": 5.1447109308832865e-06, "loss": 17.2144, "step": 27686 }, { "epoch": 0.5060961120149158, "grad_norm": 5.955982864622808, "learning_rate": 5.144415043094892e-06, "loss": 17.585, "step": 27687 }, { "epoch": 0.5061143912113624, "grad_norm": 6.180952227977285, "learning_rate": 5.144119154800338e-06, "loss": 17.5469, "step": 27688 }, { "epoch": 0.5061326704078088, "grad_norm": 7.116905280800295, "learning_rate": 5.143823266000657e-06, "loss": 17.9486, "step": 27689 }, { "epoch": 0.5061509496042554, "grad_norm": 6.0854189530902625, "learning_rate": 5.143527376696886e-06, "loss": 17.3287, "step": 27690 }, { "epoch": 0.5061692288007019, "grad_norm": 5.9056177128777305, "learning_rate": 5.1432314868900675e-06, "loss": 17.273, "step": 27691 }, { "epoch": 0.5061875079971484, "grad_norm": 7.1257202352464475, "learning_rate": 5.1429355965812335e-06, "loss": 17.7816, "step": 27692 }, { "epoch": 0.506205787193595, "grad_norm": 4.9526104789537975, "learning_rate": 5.142639705771422e-06, "loss": 16.9168, "step": 27693 }, { "epoch": 0.5062240663900415, "grad_norm": 6.925540441649807, "learning_rate": 5.142343814461671e-06, "loss": 18.026, "step": 27694 }, { "epoch": 0.5062423455864881, "grad_norm": 5.057242785752675, "learning_rate": 5.1420479226530176e-06, "loss": 16.9973, "step": 27695 }, { "epoch": 0.5062606247829345, "grad_norm": 5.298186320413527, "learning_rate": 5.141752030346499e-06, "loss": 17.0309, "step": 27696 }, { "epoch": 0.506278903979381, "grad_norm": 6.590194470556828, "learning_rate": 5.141456137543151e-06, "loss": 17.6833, "step": 27697 }, { "epoch": 0.5062971831758276, "grad_norm": 6.984955925175951, "learning_rate": 5.141160244244011e-06, "loss": 17.9011, "step": 27698 }, { "epoch": 0.5063154623722741, "grad_norm": 4.831846667844356, "learning_rate": 5.140864350450117e-06, "loss": 16.9374, "step": 27699 }, { "epoch": 0.5063337415687207, "grad_norm": 6.681990776830355, "learning_rate": 5.140568456162507e-06, "loss": 17.773, "step": 27700 }, { "epoch": 0.5063520207651672, "grad_norm": 7.553439780878484, "learning_rate": 5.1402725613822165e-06, "loss": 18.0232, "step": 27701 }, { "epoch": 0.5063702999616136, "grad_norm": 7.438364030658903, "learning_rate": 5.139976666110283e-06, "loss": 17.9461, "step": 27702 }, { "epoch": 0.5063885791580602, "grad_norm": 5.842131301653922, "learning_rate": 5.139680770347741e-06, "loss": 17.4337, "step": 27703 }, { "epoch": 0.5064068583545067, "grad_norm": 6.235689119762629, "learning_rate": 5.139384874095631e-06, "loss": 17.5769, "step": 27704 }, { "epoch": 0.5064251375509533, "grad_norm": 6.5957393003374305, "learning_rate": 5.139088977354991e-06, "loss": 17.4528, "step": 27705 }, { "epoch": 0.5064434167473998, "grad_norm": 8.876219451068485, "learning_rate": 5.138793080126855e-06, "loss": 17.9884, "step": 27706 }, { "epoch": 0.5064616959438463, "grad_norm": 6.536162880783229, "learning_rate": 5.138497182412261e-06, "loss": 17.8436, "step": 27707 }, { "epoch": 0.5064799751402929, "grad_norm": 5.346922379373697, "learning_rate": 5.138201284212246e-06, "loss": 17.1215, "step": 27708 }, { "epoch": 0.5064982543367393, "grad_norm": 6.920789085254473, "learning_rate": 5.13790538552785e-06, "loss": 18.0832, "step": 27709 }, { "epoch": 0.5065165335331859, "grad_norm": 5.38868080758509, "learning_rate": 5.137609486360105e-06, "loss": 17.1127, "step": 27710 }, { "epoch": 0.5065348127296324, "grad_norm": 7.3354816946597525, "learning_rate": 5.137313586710051e-06, "loss": 17.5267, "step": 27711 }, { "epoch": 0.5065530919260789, "grad_norm": 5.713671445638332, "learning_rate": 5.137017686578724e-06, "loss": 17.0097, "step": 27712 }, { "epoch": 0.5065713711225255, "grad_norm": 7.486510099193616, "learning_rate": 5.136721785967165e-06, "loss": 17.7021, "step": 27713 }, { "epoch": 0.506589650318972, "grad_norm": 5.746439996288859, "learning_rate": 5.136425884876405e-06, "loss": 17.2224, "step": 27714 }, { "epoch": 0.5066079295154186, "grad_norm": 7.3379061579999965, "learning_rate": 5.136129983307486e-06, "loss": 17.9784, "step": 27715 }, { "epoch": 0.506626208711865, "grad_norm": 6.008121010187761, "learning_rate": 5.135834081261443e-06, "loss": 17.6975, "step": 27716 }, { "epoch": 0.5066444879083115, "grad_norm": 7.21445062747721, "learning_rate": 5.135538178739311e-06, "loss": 17.6537, "step": 27717 }, { "epoch": 0.5066627671047581, "grad_norm": 5.362687134513164, "learning_rate": 5.135242275742132e-06, "loss": 17.1599, "step": 27718 }, { "epoch": 0.5066810463012046, "grad_norm": 7.813650131621309, "learning_rate": 5.13494637227094e-06, "loss": 18.3504, "step": 27719 }, { "epoch": 0.5066993254976512, "grad_norm": 6.092689226364962, "learning_rate": 5.134650468326773e-06, "loss": 17.5533, "step": 27720 }, { "epoch": 0.5067176046940977, "grad_norm": 7.587971222214896, "learning_rate": 5.134354563910667e-06, "loss": 18.0693, "step": 27721 }, { "epoch": 0.5067358838905441, "grad_norm": 6.330113621846811, "learning_rate": 5.134058659023661e-06, "loss": 17.2376, "step": 27722 }, { "epoch": 0.5067541630869907, "grad_norm": 6.32979980405475, "learning_rate": 5.133762753666789e-06, "loss": 17.7372, "step": 27723 }, { "epoch": 0.5067724422834372, "grad_norm": 5.110404489032517, "learning_rate": 5.1334668478410925e-06, "loss": 16.9661, "step": 27724 }, { "epoch": 0.5067907214798838, "grad_norm": 6.623456977007643, "learning_rate": 5.133170941547604e-06, "loss": 17.2745, "step": 27725 }, { "epoch": 0.5068090006763303, "grad_norm": 8.008242050881773, "learning_rate": 5.132875034787365e-06, "loss": 18.2314, "step": 27726 }, { "epoch": 0.5068272798727768, "grad_norm": 5.7253805568384895, "learning_rate": 5.132579127561409e-06, "loss": 17.1657, "step": 27727 }, { "epoch": 0.5068455590692234, "grad_norm": 9.219274905737294, "learning_rate": 5.132283219870775e-06, "loss": 18.0161, "step": 27728 }, { "epoch": 0.5068638382656698, "grad_norm": 7.70803399074323, "learning_rate": 5.1319873117165005e-06, "loss": 17.7997, "step": 27729 }, { "epoch": 0.5068821174621163, "grad_norm": 6.875985284577539, "learning_rate": 5.131691403099621e-06, "loss": 17.5223, "step": 27730 }, { "epoch": 0.5069003966585629, "grad_norm": 5.026856926452195, "learning_rate": 5.1313954940211755e-06, "loss": 17.085, "step": 27731 }, { "epoch": 0.5069186758550094, "grad_norm": 6.312322182507129, "learning_rate": 5.1310995844822e-06, "loss": 17.629, "step": 27732 }, { "epoch": 0.506936955051456, "grad_norm": 5.544649978169023, "learning_rate": 5.130803674483732e-06, "loss": 17.2875, "step": 27733 }, { "epoch": 0.5069552342479025, "grad_norm": 6.679378183058156, "learning_rate": 5.130507764026808e-06, "loss": 17.8269, "step": 27734 }, { "epoch": 0.5069735134443489, "grad_norm": 6.886195116724657, "learning_rate": 5.130211853112463e-06, "loss": 17.8946, "step": 27735 }, { "epoch": 0.5069917926407955, "grad_norm": 5.442584452922059, "learning_rate": 5.129915941741741e-06, "loss": 17.1328, "step": 27736 }, { "epoch": 0.507010071837242, "grad_norm": 6.613305630433648, "learning_rate": 5.129620029915674e-06, "loss": 17.8886, "step": 27737 }, { "epoch": 0.5070283510336886, "grad_norm": 6.267162436401069, "learning_rate": 5.1293241176353e-06, "loss": 17.4957, "step": 27738 }, { "epoch": 0.5070466302301351, "grad_norm": 5.707869374733119, "learning_rate": 5.129028204901654e-06, "loss": 17.1091, "step": 27739 }, { "epoch": 0.5070649094265816, "grad_norm": 6.481475256922889, "learning_rate": 5.128732291715777e-06, "loss": 17.533, "step": 27740 }, { "epoch": 0.5070831886230281, "grad_norm": 5.59205507786949, "learning_rate": 5.128436378078704e-06, "loss": 17.2637, "step": 27741 }, { "epoch": 0.5071014678194746, "grad_norm": 8.04507234687692, "learning_rate": 5.128140463991473e-06, "loss": 18.3353, "step": 27742 }, { "epoch": 0.5071197470159212, "grad_norm": 5.860938477511177, "learning_rate": 5.127844549455122e-06, "loss": 17.3962, "step": 27743 }, { "epoch": 0.5071380262123677, "grad_norm": 6.096136693855123, "learning_rate": 5.127548634470685e-06, "loss": 17.3681, "step": 27744 }, { "epoch": 0.5071563054088142, "grad_norm": 6.991027932920232, "learning_rate": 5.127252719039202e-06, "loss": 17.618, "step": 27745 }, { "epoch": 0.5071745846052608, "grad_norm": 6.212866849705913, "learning_rate": 5.126956803161709e-06, "loss": 17.4926, "step": 27746 }, { "epoch": 0.5071928638017072, "grad_norm": 5.892368860593251, "learning_rate": 5.126660886839244e-06, "loss": 17.3287, "step": 27747 }, { "epoch": 0.5072111429981538, "grad_norm": 7.190833850718565, "learning_rate": 5.126364970072843e-06, "loss": 17.5176, "step": 27748 }, { "epoch": 0.5072294221946003, "grad_norm": 7.364418141602431, "learning_rate": 5.126069052863542e-06, "loss": 17.8325, "step": 27749 }, { "epoch": 0.5072477013910468, "grad_norm": 8.195818727530257, "learning_rate": 5.125773135212383e-06, "loss": 18.4358, "step": 27750 }, { "epoch": 0.5072659805874934, "grad_norm": 6.889646480412214, "learning_rate": 5.125477217120399e-06, "loss": 17.9033, "step": 27751 }, { "epoch": 0.5072842597839399, "grad_norm": 5.628395766056375, "learning_rate": 5.125181298588629e-06, "loss": 17.3565, "step": 27752 }, { "epoch": 0.5073025389803865, "grad_norm": 4.957846213430797, "learning_rate": 5.124885379618107e-06, "loss": 16.8323, "step": 27753 }, { "epoch": 0.5073208181768329, "grad_norm": 8.043322179811897, "learning_rate": 5.124589460209875e-06, "loss": 17.9384, "step": 27754 }, { "epoch": 0.5073390973732794, "grad_norm": 5.643776303822953, "learning_rate": 5.124293540364966e-06, "loss": 17.4568, "step": 27755 }, { "epoch": 0.507357376569726, "grad_norm": 6.823357145909125, "learning_rate": 5.12399762008442e-06, "loss": 17.6151, "step": 27756 }, { "epoch": 0.5073756557661725, "grad_norm": 6.848050043748216, "learning_rate": 5.1237016993692726e-06, "loss": 17.5411, "step": 27757 }, { "epoch": 0.5073939349626191, "grad_norm": 6.818873049251554, "learning_rate": 5.123405778220562e-06, "loss": 17.5898, "step": 27758 }, { "epoch": 0.5074122141590656, "grad_norm": 6.891246948640802, "learning_rate": 5.123109856639325e-06, "loss": 17.724, "step": 27759 }, { "epoch": 0.507430493355512, "grad_norm": 5.22646842897986, "learning_rate": 5.122813934626598e-06, "loss": 17.3879, "step": 27760 }, { "epoch": 0.5074487725519586, "grad_norm": 6.241375253225352, "learning_rate": 5.122518012183419e-06, "loss": 17.4592, "step": 27761 }, { "epoch": 0.5074670517484051, "grad_norm": 7.011943657573289, "learning_rate": 5.1222220893108245e-06, "loss": 17.7635, "step": 27762 }, { "epoch": 0.5074853309448517, "grad_norm": 8.340489642847533, "learning_rate": 5.121926166009854e-06, "loss": 17.9439, "step": 27763 }, { "epoch": 0.5075036101412982, "grad_norm": 7.020032119133576, "learning_rate": 5.1216302422815415e-06, "loss": 18.0195, "step": 27764 }, { "epoch": 0.5075218893377447, "grad_norm": 7.319892816728759, "learning_rate": 5.121334318126925e-06, "loss": 17.8734, "step": 27765 }, { "epoch": 0.5075401685341913, "grad_norm": 6.401731890036808, "learning_rate": 5.121038393547043e-06, "loss": 17.5819, "step": 27766 }, { "epoch": 0.5075584477306377, "grad_norm": 4.869051610432103, "learning_rate": 5.120742468542932e-06, "loss": 17.0804, "step": 27767 }, { "epoch": 0.5075767269270843, "grad_norm": 7.001114839435052, "learning_rate": 5.120446543115629e-06, "loss": 17.6885, "step": 27768 }, { "epoch": 0.5075950061235308, "grad_norm": 5.985950871215604, "learning_rate": 5.12015061726617e-06, "loss": 17.2141, "step": 27769 }, { "epoch": 0.5076132853199773, "grad_norm": 6.203530235471378, "learning_rate": 5.119854690995597e-06, "loss": 17.4852, "step": 27770 }, { "epoch": 0.5076315645164239, "grad_norm": 6.345810166297624, "learning_rate": 5.11955876430494e-06, "loss": 17.5605, "step": 27771 }, { "epoch": 0.5076498437128704, "grad_norm": 7.017718859447296, "learning_rate": 5.119262837195241e-06, "loss": 18.0204, "step": 27772 }, { "epoch": 0.507668122909317, "grad_norm": 5.915548862643262, "learning_rate": 5.118966909667536e-06, "loss": 17.4062, "step": 27773 }, { "epoch": 0.5076864021057634, "grad_norm": 8.459241006330435, "learning_rate": 5.118670981722864e-06, "loss": 18.7702, "step": 27774 }, { "epoch": 0.5077046813022099, "grad_norm": 5.26648824627864, "learning_rate": 5.11837505336226e-06, "loss": 17.0453, "step": 27775 }, { "epoch": 0.5077229604986565, "grad_norm": 6.546008264375168, "learning_rate": 5.11807912458676e-06, "loss": 17.4485, "step": 27776 }, { "epoch": 0.507741239695103, "grad_norm": 6.065952069195552, "learning_rate": 5.117783195397405e-06, "loss": 17.2764, "step": 27777 }, { "epoch": 0.5077595188915496, "grad_norm": 6.844432593900314, "learning_rate": 5.117487265795229e-06, "loss": 17.4527, "step": 27778 }, { "epoch": 0.5077777980879961, "grad_norm": 5.670818472573281, "learning_rate": 5.11719133578127e-06, "loss": 17.3438, "step": 27779 }, { "epoch": 0.5077960772844425, "grad_norm": 7.971437219958533, "learning_rate": 5.1168954053565655e-06, "loss": 18.0089, "step": 27780 }, { "epoch": 0.5078143564808891, "grad_norm": 6.268244555676373, "learning_rate": 5.116599474522153e-06, "loss": 17.5396, "step": 27781 }, { "epoch": 0.5078326356773356, "grad_norm": 6.215729356350821, "learning_rate": 5.11630354327907e-06, "loss": 17.5865, "step": 27782 }, { "epoch": 0.5078509148737822, "grad_norm": 6.288194553442732, "learning_rate": 5.116007611628353e-06, "loss": 17.5758, "step": 27783 }, { "epoch": 0.5078691940702287, "grad_norm": 6.14978246789447, "learning_rate": 5.115711679571038e-06, "loss": 17.4051, "step": 27784 }, { "epoch": 0.5078874732666752, "grad_norm": 6.612922377398286, "learning_rate": 5.115415747108166e-06, "loss": 17.4096, "step": 27785 }, { "epoch": 0.5079057524631218, "grad_norm": 6.47498327904835, "learning_rate": 5.11511981424077e-06, "loss": 17.2669, "step": 27786 }, { "epoch": 0.5079240316595682, "grad_norm": 7.595785899509144, "learning_rate": 5.114823880969889e-06, "loss": 17.6638, "step": 27787 }, { "epoch": 0.5079423108560148, "grad_norm": 6.23259983046272, "learning_rate": 5.114527947296563e-06, "loss": 17.4832, "step": 27788 }, { "epoch": 0.5079605900524613, "grad_norm": 7.519498838984317, "learning_rate": 5.114232013221823e-06, "loss": 17.7527, "step": 27789 }, { "epoch": 0.5079788692489078, "grad_norm": 7.11893307367405, "learning_rate": 5.1139360787467104e-06, "loss": 17.8125, "step": 27790 }, { "epoch": 0.5079971484453544, "grad_norm": 7.572962276243854, "learning_rate": 5.113640143872264e-06, "loss": 17.6688, "step": 27791 }, { "epoch": 0.5080154276418009, "grad_norm": 5.910023689579945, "learning_rate": 5.1133442085995165e-06, "loss": 17.5413, "step": 27792 }, { "epoch": 0.5080337068382474, "grad_norm": 6.737547699899249, "learning_rate": 5.113048272929508e-06, "loss": 17.8257, "step": 27793 }, { "epoch": 0.5080519860346939, "grad_norm": 6.569180814096426, "learning_rate": 5.112752336863275e-06, "loss": 17.3771, "step": 27794 }, { "epoch": 0.5080702652311404, "grad_norm": 6.509063902180465, "learning_rate": 5.112456400401855e-06, "loss": 17.4601, "step": 27795 }, { "epoch": 0.508088544427587, "grad_norm": 6.216367487588847, "learning_rate": 5.112160463546285e-06, "loss": 17.2877, "step": 27796 }, { "epoch": 0.5081068236240335, "grad_norm": 5.924333410370937, "learning_rate": 5.111864526297603e-06, "loss": 17.0676, "step": 27797 }, { "epoch": 0.50812510282048, "grad_norm": 6.347197197891492, "learning_rate": 5.111568588656845e-06, "loss": 17.7076, "step": 27798 }, { "epoch": 0.5081433820169265, "grad_norm": 6.201500369875943, "learning_rate": 5.111272650625049e-06, "loss": 17.4921, "step": 27799 }, { "epoch": 0.508161661213373, "grad_norm": 5.864006941659548, "learning_rate": 5.110976712203251e-06, "loss": 17.397, "step": 27800 }, { "epoch": 0.5081799404098196, "grad_norm": 6.527750122750077, "learning_rate": 5.110680773392491e-06, "loss": 17.7126, "step": 27801 }, { "epoch": 0.5081982196062661, "grad_norm": 5.978759562287268, "learning_rate": 5.110384834193804e-06, "loss": 17.5569, "step": 27802 }, { "epoch": 0.5082164988027126, "grad_norm": 5.653941790257011, "learning_rate": 5.110088894608226e-06, "loss": 17.2645, "step": 27803 }, { "epoch": 0.5082347779991592, "grad_norm": 7.111886366714862, "learning_rate": 5.109792954636796e-06, "loss": 17.7092, "step": 27804 }, { "epoch": 0.5082530571956057, "grad_norm": 5.887329362261316, "learning_rate": 5.109497014280555e-06, "loss": 17.5025, "step": 27805 }, { "epoch": 0.5082713363920522, "grad_norm": 6.316006213197096, "learning_rate": 5.1092010735405325e-06, "loss": 17.4514, "step": 27806 }, { "epoch": 0.5082896155884987, "grad_norm": 5.425393787878941, "learning_rate": 5.108905132417772e-06, "loss": 17.1413, "step": 27807 }, { "epoch": 0.5083078947849452, "grad_norm": 6.179089818058358, "learning_rate": 5.1086091909133075e-06, "loss": 17.3279, "step": 27808 }, { "epoch": 0.5083261739813918, "grad_norm": 5.75494897087057, "learning_rate": 5.108313249028177e-06, "loss": 17.2535, "step": 27809 }, { "epoch": 0.5083444531778383, "grad_norm": 7.413869531581902, "learning_rate": 5.108017306763417e-06, "loss": 17.927, "step": 27810 }, { "epoch": 0.5083627323742849, "grad_norm": 6.531126158633522, "learning_rate": 5.107721364120067e-06, "loss": 17.4851, "step": 27811 }, { "epoch": 0.5083810115707313, "grad_norm": 7.495061218472021, "learning_rate": 5.107425421099163e-06, "loss": 18.0959, "step": 27812 }, { "epoch": 0.5083992907671778, "grad_norm": 7.345337631125079, "learning_rate": 5.107129477701743e-06, "loss": 17.8673, "step": 27813 }, { "epoch": 0.5084175699636244, "grad_norm": 5.881337538549764, "learning_rate": 5.106833533928842e-06, "loss": 17.3385, "step": 27814 }, { "epoch": 0.5084358491600709, "grad_norm": 6.806397927155145, "learning_rate": 5.106537589781501e-06, "loss": 17.442, "step": 27815 }, { "epoch": 0.5084541283565175, "grad_norm": 7.2959632017956695, "learning_rate": 5.106241645260754e-06, "loss": 17.8208, "step": 27816 }, { "epoch": 0.508472407552964, "grad_norm": 5.475619954715328, "learning_rate": 5.105945700367636e-06, "loss": 16.8526, "step": 27817 }, { "epoch": 0.5084906867494104, "grad_norm": 8.234228650583185, "learning_rate": 5.105649755103191e-06, "loss": 18.1657, "step": 27818 }, { "epoch": 0.508508965945857, "grad_norm": 8.355068089071333, "learning_rate": 5.1053538094684515e-06, "loss": 17.6617, "step": 27819 }, { "epoch": 0.5085272451423035, "grad_norm": 6.819178272566638, "learning_rate": 5.105057863464458e-06, "loss": 17.7844, "step": 27820 }, { "epoch": 0.5085455243387501, "grad_norm": 6.923034621610501, "learning_rate": 5.104761917092243e-06, "loss": 17.6915, "step": 27821 }, { "epoch": 0.5085638035351966, "grad_norm": 6.931761091764257, "learning_rate": 5.104465970352848e-06, "loss": 17.6312, "step": 27822 }, { "epoch": 0.5085820827316431, "grad_norm": 5.682705060024975, "learning_rate": 5.1041700232473095e-06, "loss": 17.3104, "step": 27823 }, { "epoch": 0.5086003619280897, "grad_norm": 6.657548656800469, "learning_rate": 5.103874075776663e-06, "loss": 17.6736, "step": 27824 }, { "epoch": 0.5086186411245361, "grad_norm": 7.483931887010073, "learning_rate": 5.103578127941946e-06, "loss": 17.8158, "step": 27825 }, { "epoch": 0.5086369203209827, "grad_norm": 6.676413809934157, "learning_rate": 5.103282179744198e-06, "loss": 17.5908, "step": 27826 }, { "epoch": 0.5086551995174292, "grad_norm": 7.886278071909366, "learning_rate": 5.102986231184455e-06, "loss": 17.9176, "step": 27827 }, { "epoch": 0.5086734787138757, "grad_norm": 7.839605698319246, "learning_rate": 5.102690282263754e-06, "loss": 17.8862, "step": 27828 }, { "epoch": 0.5086917579103223, "grad_norm": 4.9164327604959315, "learning_rate": 5.102394332983132e-06, "loss": 17.0036, "step": 27829 }, { "epoch": 0.5087100371067688, "grad_norm": 6.177818813364183, "learning_rate": 5.1020983833436264e-06, "loss": 17.1324, "step": 27830 }, { "epoch": 0.5087283163032154, "grad_norm": 8.576030724323914, "learning_rate": 5.101802433346275e-06, "loss": 17.9203, "step": 27831 }, { "epoch": 0.5087465954996618, "grad_norm": 7.2500642547772225, "learning_rate": 5.101506482992118e-06, "loss": 18.0675, "step": 27832 }, { "epoch": 0.5087648746961083, "grad_norm": 6.52184673889023, "learning_rate": 5.101210532282187e-06, "loss": 17.4957, "step": 27833 }, { "epoch": 0.5087831538925549, "grad_norm": 6.68907586183205, "learning_rate": 5.100914581217521e-06, "loss": 17.5087, "step": 27834 }, { "epoch": 0.5088014330890014, "grad_norm": 6.575152285909569, "learning_rate": 5.1006186297991574e-06, "loss": 17.392, "step": 27835 }, { "epoch": 0.508819712285448, "grad_norm": 11.465419141079181, "learning_rate": 5.100322678028138e-06, "loss": 19.344, "step": 27836 }, { "epoch": 0.5088379914818945, "grad_norm": 6.067945112157284, "learning_rate": 5.100026725905494e-06, "loss": 17.4425, "step": 27837 }, { "epoch": 0.5088562706783409, "grad_norm": 5.802755459897126, "learning_rate": 5.099730773432266e-06, "loss": 17.0646, "step": 27838 }, { "epoch": 0.5088745498747875, "grad_norm": 6.309037106321797, "learning_rate": 5.099434820609488e-06, "loss": 17.2151, "step": 27839 }, { "epoch": 0.508892829071234, "grad_norm": 6.163295659381445, "learning_rate": 5.099138867438201e-06, "loss": 17.281, "step": 27840 }, { "epoch": 0.5089111082676806, "grad_norm": 8.354820996571528, "learning_rate": 5.098842913919442e-06, "loss": 18.0639, "step": 27841 }, { "epoch": 0.5089293874641271, "grad_norm": 7.64688041787203, "learning_rate": 5.098546960054246e-06, "loss": 17.6232, "step": 27842 }, { "epoch": 0.5089476666605736, "grad_norm": 6.7545021547324, "learning_rate": 5.098251005843652e-06, "loss": 17.6928, "step": 27843 }, { "epoch": 0.5089659458570202, "grad_norm": 5.143487498292757, "learning_rate": 5.097955051288696e-06, "loss": 17.1643, "step": 27844 }, { "epoch": 0.5089842250534666, "grad_norm": 5.53427588550187, "learning_rate": 5.097659096390416e-06, "loss": 17.2131, "step": 27845 }, { "epoch": 0.5090025042499132, "grad_norm": 6.30860754506615, "learning_rate": 5.09736314114985e-06, "loss": 17.7132, "step": 27846 }, { "epoch": 0.5090207834463597, "grad_norm": 8.563713722008549, "learning_rate": 5.0970671855680344e-06, "loss": 17.6117, "step": 27847 }, { "epoch": 0.5090390626428062, "grad_norm": 7.1524516519094306, "learning_rate": 5.096771229646007e-06, "loss": 17.6969, "step": 27848 }, { "epoch": 0.5090573418392528, "grad_norm": 5.634445277772277, "learning_rate": 5.0964752733848035e-06, "loss": 17.332, "step": 27849 }, { "epoch": 0.5090756210356993, "grad_norm": 10.846335040821502, "learning_rate": 5.096179316785464e-06, "loss": 18.1933, "step": 27850 }, { "epoch": 0.5090939002321458, "grad_norm": 5.813532846858103, "learning_rate": 5.095883359849024e-06, "loss": 17.3308, "step": 27851 }, { "epoch": 0.5091121794285923, "grad_norm": 6.807114437097919, "learning_rate": 5.095587402576521e-06, "loss": 17.7462, "step": 27852 }, { "epoch": 0.5091304586250388, "grad_norm": 5.423405840084081, "learning_rate": 5.095291444968993e-06, "loss": 17.0539, "step": 27853 }, { "epoch": 0.5091487378214854, "grad_norm": 8.369617524169517, "learning_rate": 5.094995487027475e-06, "loss": 17.9208, "step": 27854 }, { "epoch": 0.5091670170179319, "grad_norm": 5.734025767614059, "learning_rate": 5.094699528753008e-06, "loss": 17.2728, "step": 27855 }, { "epoch": 0.5091852962143785, "grad_norm": 7.37603727772448, "learning_rate": 5.094403570146626e-06, "loss": 17.5804, "step": 27856 }, { "epoch": 0.509203575410825, "grad_norm": 5.523727202934449, "learning_rate": 5.0941076112093694e-06, "loss": 17.057, "step": 27857 }, { "epoch": 0.5092218546072714, "grad_norm": 6.575651794090517, "learning_rate": 5.093811651942272e-06, "loss": 17.824, "step": 27858 }, { "epoch": 0.509240133803718, "grad_norm": 7.08990373527558, "learning_rate": 5.093515692346373e-06, "loss": 17.8624, "step": 27859 }, { "epoch": 0.5092584130001645, "grad_norm": 6.252322738583567, "learning_rate": 5.093219732422712e-06, "loss": 17.2543, "step": 27860 }, { "epoch": 0.5092766921966111, "grad_norm": 7.274000395594801, "learning_rate": 5.092923772172322e-06, "loss": 17.5227, "step": 27861 }, { "epoch": 0.5092949713930576, "grad_norm": 7.363211891902213, "learning_rate": 5.092627811596241e-06, "loss": 17.73, "step": 27862 }, { "epoch": 0.509313250589504, "grad_norm": 7.440880837814436, "learning_rate": 5.0923318506955086e-06, "loss": 17.8423, "step": 27863 }, { "epoch": 0.5093315297859506, "grad_norm": 5.093079881812766, "learning_rate": 5.092035889471163e-06, "loss": 17.0842, "step": 27864 }, { "epoch": 0.5093498089823971, "grad_norm": 6.865865467142303, "learning_rate": 5.091739927924239e-06, "loss": 17.9005, "step": 27865 }, { "epoch": 0.5093680881788436, "grad_norm": 9.030087018564162, "learning_rate": 5.0914439660557726e-06, "loss": 18.2221, "step": 27866 }, { "epoch": 0.5093863673752902, "grad_norm": 6.753652388079484, "learning_rate": 5.0911480038668036e-06, "loss": 17.7345, "step": 27867 }, { "epoch": 0.5094046465717367, "grad_norm": 7.056262798115523, "learning_rate": 5.090852041358369e-06, "loss": 17.5722, "step": 27868 }, { "epoch": 0.5094229257681833, "grad_norm": 6.356815609070242, "learning_rate": 5.0905560785315065e-06, "loss": 17.3165, "step": 27869 }, { "epoch": 0.5094412049646297, "grad_norm": 6.512300669371463, "learning_rate": 5.090260115387253e-06, "loss": 17.2421, "step": 27870 }, { "epoch": 0.5094594841610762, "grad_norm": 8.32693455713865, "learning_rate": 5.089964151926644e-06, "loss": 18.4941, "step": 27871 }, { "epoch": 0.5094777633575228, "grad_norm": 5.997102400857258, "learning_rate": 5.089668188150719e-06, "loss": 17.2529, "step": 27872 }, { "epoch": 0.5094960425539693, "grad_norm": 6.3895848858132105, "learning_rate": 5.089372224060516e-06, "loss": 17.4529, "step": 27873 }, { "epoch": 0.5095143217504159, "grad_norm": 8.13476791743883, "learning_rate": 5.089076259657071e-06, "loss": 18.2045, "step": 27874 }, { "epoch": 0.5095326009468624, "grad_norm": 5.361835074480378, "learning_rate": 5.088780294941421e-06, "loss": 17.0467, "step": 27875 }, { "epoch": 0.5095508801433088, "grad_norm": 6.250313742437329, "learning_rate": 5.088484329914602e-06, "loss": 17.5089, "step": 27876 }, { "epoch": 0.5095691593397554, "grad_norm": 6.288506411961067, "learning_rate": 5.0881883645776565e-06, "loss": 17.7503, "step": 27877 }, { "epoch": 0.5095874385362019, "grad_norm": 6.757143820103346, "learning_rate": 5.087892398931616e-06, "loss": 17.5151, "step": 27878 }, { "epoch": 0.5096057177326485, "grad_norm": 8.195882530412849, "learning_rate": 5.087596432977521e-06, "loss": 17.9789, "step": 27879 }, { "epoch": 0.509623996929095, "grad_norm": 7.861485717426822, "learning_rate": 5.087300466716407e-06, "loss": 17.7657, "step": 27880 }, { "epoch": 0.5096422761255415, "grad_norm": 6.36459832657322, "learning_rate": 5.087004500149314e-06, "loss": 17.5838, "step": 27881 }, { "epoch": 0.5096605553219881, "grad_norm": 5.4643637952944415, "learning_rate": 5.086708533277277e-06, "loss": 17.012, "step": 27882 }, { "epoch": 0.5096788345184345, "grad_norm": 5.439422791490859, "learning_rate": 5.086412566101334e-06, "loss": 17.1715, "step": 27883 }, { "epoch": 0.5096971137148811, "grad_norm": 6.050986215713007, "learning_rate": 5.086116598622522e-06, "loss": 17.3001, "step": 27884 }, { "epoch": 0.5097153929113276, "grad_norm": 7.2932790019798235, "learning_rate": 5.08582063084188e-06, "loss": 17.6906, "step": 27885 }, { "epoch": 0.5097336721077741, "grad_norm": 6.866716266787876, "learning_rate": 5.085524662760444e-06, "loss": 17.6706, "step": 27886 }, { "epoch": 0.5097519513042207, "grad_norm": 6.237292491224625, "learning_rate": 5.085228694379251e-06, "loss": 17.4173, "step": 27887 }, { "epoch": 0.5097702305006672, "grad_norm": 9.178059618595595, "learning_rate": 5.0849327256993394e-06, "loss": 18.0714, "step": 27888 }, { "epoch": 0.5097885096971138, "grad_norm": 6.494638280082455, "learning_rate": 5.084636756721744e-06, "loss": 17.8371, "step": 27889 }, { "epoch": 0.5098067888935602, "grad_norm": 8.03154022239092, "learning_rate": 5.084340787447506e-06, "loss": 18.1014, "step": 27890 }, { "epoch": 0.5098250680900067, "grad_norm": 8.497256806005296, "learning_rate": 5.084044817877663e-06, "loss": 17.6061, "step": 27891 }, { "epoch": 0.5098433472864533, "grad_norm": 6.4522589549367675, "learning_rate": 5.083748848013247e-06, "loss": 17.4162, "step": 27892 }, { "epoch": 0.5098616264828998, "grad_norm": 6.124167891150617, "learning_rate": 5.083452877855298e-06, "loss": 17.3322, "step": 27893 }, { "epoch": 0.5098799056793464, "grad_norm": 6.070776568095202, "learning_rate": 5.083156907404855e-06, "loss": 17.1881, "step": 27894 }, { "epoch": 0.5098981848757929, "grad_norm": 6.869976134582212, "learning_rate": 5.0828609366629556e-06, "loss": 17.7099, "step": 27895 }, { "epoch": 0.5099164640722393, "grad_norm": 6.624390682490424, "learning_rate": 5.082564965630634e-06, "loss": 17.6734, "step": 27896 }, { "epoch": 0.5099347432686859, "grad_norm": 6.250022675612942, "learning_rate": 5.0822689943089294e-06, "loss": 17.5516, "step": 27897 }, { "epoch": 0.5099530224651324, "grad_norm": 6.681294668058565, "learning_rate": 5.0819730226988805e-06, "loss": 17.3986, "step": 27898 }, { "epoch": 0.509971301661579, "grad_norm": 8.381762746719547, "learning_rate": 5.081677050801522e-06, "loss": 18.1535, "step": 27899 }, { "epoch": 0.5099895808580255, "grad_norm": 6.578829196788574, "learning_rate": 5.081381078617893e-06, "loss": 17.7318, "step": 27900 }, { "epoch": 0.510007860054472, "grad_norm": 7.0951003053675334, "learning_rate": 5.0810851061490315e-06, "loss": 17.6282, "step": 27901 }, { "epoch": 0.5100261392509186, "grad_norm": 5.103586841876317, "learning_rate": 5.080789133395973e-06, "loss": 17.1156, "step": 27902 }, { "epoch": 0.510044418447365, "grad_norm": 7.460144193023806, "learning_rate": 5.080493160359754e-06, "loss": 17.6798, "step": 27903 }, { "epoch": 0.5100626976438116, "grad_norm": 7.090248826311705, "learning_rate": 5.080197187041415e-06, "loss": 17.0903, "step": 27904 }, { "epoch": 0.5100809768402581, "grad_norm": 6.8042642548949885, "learning_rate": 5.079901213441992e-06, "loss": 17.5889, "step": 27905 }, { "epoch": 0.5100992560367046, "grad_norm": 6.165145144153961, "learning_rate": 5.079605239562522e-06, "loss": 16.9309, "step": 27906 }, { "epoch": 0.5101175352331512, "grad_norm": 6.504810586479322, "learning_rate": 5.079309265404042e-06, "loss": 17.6423, "step": 27907 }, { "epoch": 0.5101358144295977, "grad_norm": 5.552455567232766, "learning_rate": 5.079013290967589e-06, "loss": 17.2008, "step": 27908 }, { "epoch": 0.5101540936260442, "grad_norm": 7.036312304543531, "learning_rate": 5.078717316254202e-06, "loss": 17.0784, "step": 27909 }, { "epoch": 0.5101723728224907, "grad_norm": 6.5684040882431285, "learning_rate": 5.078421341264919e-06, "loss": 17.5405, "step": 27910 }, { "epoch": 0.5101906520189372, "grad_norm": 6.3598182509281775, "learning_rate": 5.078125366000775e-06, "loss": 17.3858, "step": 27911 }, { "epoch": 0.5102089312153838, "grad_norm": 6.950337943610434, "learning_rate": 5.077829390462809e-06, "loss": 17.6734, "step": 27912 }, { "epoch": 0.5102272104118303, "grad_norm": 6.392436753818627, "learning_rate": 5.077533414652056e-06, "loss": 17.6908, "step": 27913 }, { "epoch": 0.5102454896082769, "grad_norm": 6.37787231520368, "learning_rate": 5.077237438569557e-06, "loss": 17.3778, "step": 27914 }, { "epoch": 0.5102637688047233, "grad_norm": 5.703003146742271, "learning_rate": 5.076941462216347e-06, "loss": 17.2388, "step": 27915 }, { "epoch": 0.5102820480011698, "grad_norm": 5.423843335529613, "learning_rate": 5.076645485593462e-06, "loss": 17.035, "step": 27916 }, { "epoch": 0.5103003271976164, "grad_norm": 8.167982766498403, "learning_rate": 5.076349508701943e-06, "loss": 18.0702, "step": 27917 }, { "epoch": 0.5103186063940629, "grad_norm": 6.480664865267102, "learning_rate": 5.076053531542826e-06, "loss": 17.2405, "step": 27918 }, { "epoch": 0.5103368855905095, "grad_norm": 6.1631597582761195, "learning_rate": 5.075757554117148e-06, "loss": 17.2679, "step": 27919 }, { "epoch": 0.510355164786956, "grad_norm": 7.117681822102847, "learning_rate": 5.075461576425946e-06, "loss": 17.6283, "step": 27920 }, { "epoch": 0.5103734439834025, "grad_norm": 5.114716886469767, "learning_rate": 5.075165598470257e-06, "loss": 16.8313, "step": 27921 }, { "epoch": 0.510391723179849, "grad_norm": 7.345253415797447, "learning_rate": 5.07486962025112e-06, "loss": 17.9692, "step": 27922 }, { "epoch": 0.5104100023762955, "grad_norm": 5.800808293392524, "learning_rate": 5.07457364176957e-06, "loss": 17.2526, "step": 27923 }, { "epoch": 0.5104282815727421, "grad_norm": 6.642269203657916, "learning_rate": 5.0742776630266475e-06, "loss": 17.4973, "step": 27924 }, { "epoch": 0.5104465607691886, "grad_norm": 6.6416064789316875, "learning_rate": 5.073981684023388e-06, "loss": 17.5644, "step": 27925 }, { "epoch": 0.5104648399656351, "grad_norm": 6.927755807980413, "learning_rate": 5.073685704760828e-06, "loss": 17.3729, "step": 27926 }, { "epoch": 0.5104831191620817, "grad_norm": 6.033087824890685, "learning_rate": 5.073389725240006e-06, "loss": 17.3631, "step": 27927 }, { "epoch": 0.5105013983585281, "grad_norm": 5.796741823708957, "learning_rate": 5.073093745461961e-06, "loss": 17.2888, "step": 27928 }, { "epoch": 0.5105196775549747, "grad_norm": 6.489427966014684, "learning_rate": 5.072797765427729e-06, "loss": 17.6568, "step": 27929 }, { "epoch": 0.5105379567514212, "grad_norm": 5.861282253986606, "learning_rate": 5.072501785138345e-06, "loss": 17.0678, "step": 27930 }, { "epoch": 0.5105562359478677, "grad_norm": 6.958764261470338, "learning_rate": 5.07220580459485e-06, "loss": 17.4967, "step": 27931 }, { "epoch": 0.5105745151443143, "grad_norm": 7.584696063777194, "learning_rate": 5.07190982379828e-06, "loss": 17.7249, "step": 27932 }, { "epoch": 0.5105927943407608, "grad_norm": 5.35595648986422, "learning_rate": 5.071613842749672e-06, "loss": 17.1567, "step": 27933 }, { "epoch": 0.5106110735372072, "grad_norm": 6.061092903293324, "learning_rate": 5.071317861450063e-06, "loss": 17.5192, "step": 27934 }, { "epoch": 0.5106293527336538, "grad_norm": 5.365330897197779, "learning_rate": 5.0710218799004906e-06, "loss": 17.0581, "step": 27935 }, { "epoch": 0.5106476319301003, "grad_norm": 5.838573082680509, "learning_rate": 5.070725898101995e-06, "loss": 17.5544, "step": 27936 }, { "epoch": 0.5106659111265469, "grad_norm": 8.596949097713447, "learning_rate": 5.070429916055609e-06, "loss": 17.6689, "step": 27937 }, { "epoch": 0.5106841903229934, "grad_norm": 8.097615772411954, "learning_rate": 5.070133933762373e-06, "loss": 18.4352, "step": 27938 }, { "epoch": 0.5107024695194399, "grad_norm": 6.61105770284266, "learning_rate": 5.069837951223324e-06, "loss": 17.6067, "step": 27939 }, { "epoch": 0.5107207487158865, "grad_norm": 7.06885343616716, "learning_rate": 5.069541968439498e-06, "loss": 17.7823, "step": 27940 }, { "epoch": 0.5107390279123329, "grad_norm": 7.714779778663963, "learning_rate": 5.069245985411935e-06, "loss": 17.3728, "step": 27941 }, { "epoch": 0.5107573071087795, "grad_norm": 5.374890110205253, "learning_rate": 5.06895000214167e-06, "loss": 17.0761, "step": 27942 }, { "epoch": 0.510775586305226, "grad_norm": 6.779089121408302, "learning_rate": 5.068654018629743e-06, "loss": 17.2467, "step": 27943 }, { "epoch": 0.5107938655016725, "grad_norm": 6.6904896821342055, "learning_rate": 5.068358034877187e-06, "loss": 17.8552, "step": 27944 }, { "epoch": 0.5108121446981191, "grad_norm": 7.544460945651414, "learning_rate": 5.068062050885042e-06, "loss": 18.0192, "step": 27945 }, { "epoch": 0.5108304238945656, "grad_norm": 7.3778038152276055, "learning_rate": 5.0677660666543486e-06, "loss": 17.8556, "step": 27946 }, { "epoch": 0.5108487030910122, "grad_norm": 6.147904216098145, "learning_rate": 5.067470082186138e-06, "loss": 17.287, "step": 27947 }, { "epoch": 0.5108669822874586, "grad_norm": 6.036334583470327, "learning_rate": 5.067174097481451e-06, "loss": 17.5303, "step": 27948 }, { "epoch": 0.5108852614839051, "grad_norm": 6.3812375777208175, "learning_rate": 5.0668781125413235e-06, "loss": 17.2747, "step": 27949 }, { "epoch": 0.5109035406803517, "grad_norm": 7.0846058593199945, "learning_rate": 5.066582127366797e-06, "loss": 17.9406, "step": 27950 }, { "epoch": 0.5109218198767982, "grad_norm": 5.995581704388138, "learning_rate": 5.0662861419589045e-06, "loss": 17.5552, "step": 27951 }, { "epoch": 0.5109400990732448, "grad_norm": 8.079681787989854, "learning_rate": 5.065990156318686e-06, "loss": 17.5316, "step": 27952 }, { "epoch": 0.5109583782696913, "grad_norm": 6.394805775513728, "learning_rate": 5.065694170447175e-06, "loss": 17.3691, "step": 27953 }, { "epoch": 0.5109766574661377, "grad_norm": 9.04358904856608, "learning_rate": 5.065398184345413e-06, "loss": 18.2654, "step": 27954 }, { "epoch": 0.5109949366625843, "grad_norm": 5.919043342764798, "learning_rate": 5.0651021980144366e-06, "loss": 17.2662, "step": 27955 }, { "epoch": 0.5110132158590308, "grad_norm": 6.8232770058589205, "learning_rate": 5.064806211455282e-06, "loss": 17.3929, "step": 27956 }, { "epoch": 0.5110314950554774, "grad_norm": 6.3081512598405, "learning_rate": 5.0645102246689885e-06, "loss": 17.3551, "step": 27957 }, { "epoch": 0.5110497742519239, "grad_norm": 7.207245941402502, "learning_rate": 5.064214237656591e-06, "loss": 17.8506, "step": 27958 }, { "epoch": 0.5110680534483704, "grad_norm": 6.225357130711153, "learning_rate": 5.063918250419128e-06, "loss": 17.3574, "step": 27959 }, { "epoch": 0.511086332644817, "grad_norm": 6.2018937939581145, "learning_rate": 5.063622262957638e-06, "loss": 17.3057, "step": 27960 }, { "epoch": 0.5111046118412634, "grad_norm": 6.3001218579894065, "learning_rate": 5.063326275273157e-06, "loss": 17.3587, "step": 27961 }, { "epoch": 0.51112289103771, "grad_norm": 6.048242051160997, "learning_rate": 5.063030287366723e-06, "loss": 17.4052, "step": 27962 }, { "epoch": 0.5111411702341565, "grad_norm": 6.06499117173175, "learning_rate": 5.062734299239372e-06, "loss": 17.3983, "step": 27963 }, { "epoch": 0.511159449430603, "grad_norm": 5.785315620005533, "learning_rate": 5.0624383108921445e-06, "loss": 17.4715, "step": 27964 }, { "epoch": 0.5111777286270496, "grad_norm": 6.231404459331465, "learning_rate": 5.0621423223260765e-06, "loss": 17.6666, "step": 27965 }, { "epoch": 0.5111960078234961, "grad_norm": 6.198098887849262, "learning_rate": 5.061846333542204e-06, "loss": 17.3928, "step": 27966 }, { "epoch": 0.5112142870199426, "grad_norm": 6.256331047675148, "learning_rate": 5.061550344541566e-06, "loss": 17.4104, "step": 27967 }, { "epoch": 0.5112325662163891, "grad_norm": 6.070585188329285, "learning_rate": 5.061254355325198e-06, "loss": 17.4089, "step": 27968 }, { "epoch": 0.5112508454128356, "grad_norm": 5.648920168004417, "learning_rate": 5.06095836589414e-06, "loss": 17.2043, "step": 27969 }, { "epoch": 0.5112691246092822, "grad_norm": 7.33495141010944, "learning_rate": 5.060662376249429e-06, "loss": 17.926, "step": 27970 }, { "epoch": 0.5112874038057287, "grad_norm": 6.145325637030012, "learning_rate": 5.0603663863921e-06, "loss": 17.5622, "step": 27971 }, { "epoch": 0.5113056830021753, "grad_norm": 7.409558314805561, "learning_rate": 5.060070396323192e-06, "loss": 17.6996, "step": 27972 }, { "epoch": 0.5113239621986218, "grad_norm": 6.537365255117251, "learning_rate": 5.059774406043744e-06, "loss": 17.6237, "step": 27973 }, { "epoch": 0.5113422413950682, "grad_norm": 7.200750731269138, "learning_rate": 5.059478415554792e-06, "loss": 17.9639, "step": 27974 }, { "epoch": 0.5113605205915148, "grad_norm": 6.421165575310187, "learning_rate": 5.059182424857371e-06, "loss": 17.411, "step": 27975 }, { "epoch": 0.5113787997879613, "grad_norm": 5.277040088558584, "learning_rate": 5.058886433952521e-06, "loss": 17.0703, "step": 27976 }, { "epoch": 0.5113970789844079, "grad_norm": 6.936336264939123, "learning_rate": 5.0585904428412824e-06, "loss": 17.7754, "step": 27977 }, { "epoch": 0.5114153581808544, "grad_norm": 6.855191478628995, "learning_rate": 5.058294451524687e-06, "loss": 17.6812, "step": 27978 }, { "epoch": 0.5114336373773009, "grad_norm": 6.050774172441768, "learning_rate": 5.057998460003775e-06, "loss": 17.4065, "step": 27979 }, { "epoch": 0.5114519165737474, "grad_norm": 6.863536531195133, "learning_rate": 5.057702468279583e-06, "loss": 17.816, "step": 27980 }, { "epoch": 0.5114701957701939, "grad_norm": 5.223811086374738, "learning_rate": 5.0574064763531495e-06, "loss": 16.9572, "step": 27981 }, { "epoch": 0.5114884749666405, "grad_norm": 6.796207410027425, "learning_rate": 5.05711048422551e-06, "loss": 17.7347, "step": 27982 }, { "epoch": 0.511506754163087, "grad_norm": 6.777248411355415, "learning_rate": 5.056814491897705e-06, "loss": 17.7146, "step": 27983 }, { "epoch": 0.5115250333595335, "grad_norm": 6.3883882989031235, "learning_rate": 5.056518499370768e-06, "loss": 17.5044, "step": 27984 }, { "epoch": 0.5115433125559801, "grad_norm": 4.973791532447568, "learning_rate": 5.056222506645741e-06, "loss": 17.2173, "step": 27985 }, { "epoch": 0.5115615917524265, "grad_norm": 7.69910450381834, "learning_rate": 5.055926513723657e-06, "loss": 18.153, "step": 27986 }, { "epoch": 0.5115798709488731, "grad_norm": 6.213555465878709, "learning_rate": 5.055630520605557e-06, "loss": 17.3883, "step": 27987 }, { "epoch": 0.5115981501453196, "grad_norm": 6.102399203148746, "learning_rate": 5.055334527292477e-06, "loss": 17.4289, "step": 27988 }, { "epoch": 0.5116164293417661, "grad_norm": 5.704482747869875, "learning_rate": 5.055038533785451e-06, "loss": 17.3771, "step": 27989 }, { "epoch": 0.5116347085382127, "grad_norm": 7.720373131835971, "learning_rate": 5.054742540085523e-06, "loss": 17.9982, "step": 27990 }, { "epoch": 0.5116529877346592, "grad_norm": 5.988599421806966, "learning_rate": 5.054446546193728e-06, "loss": 17.4281, "step": 27991 }, { "epoch": 0.5116712669311058, "grad_norm": 6.250533702797249, "learning_rate": 5.0541505521111e-06, "loss": 17.2882, "step": 27992 }, { "epoch": 0.5116895461275522, "grad_norm": 5.697124985304141, "learning_rate": 5.05385455783868e-06, "loss": 17.2617, "step": 27993 }, { "epoch": 0.5117078253239987, "grad_norm": 8.295971595841486, "learning_rate": 5.053558563377503e-06, "loss": 18.3817, "step": 27994 }, { "epoch": 0.5117261045204453, "grad_norm": 7.837309151991971, "learning_rate": 5.053262568728609e-06, "loss": 17.877, "step": 27995 }, { "epoch": 0.5117443837168918, "grad_norm": 6.498182727342684, "learning_rate": 5.052966573893036e-06, "loss": 17.6076, "step": 27996 }, { "epoch": 0.5117626629133384, "grad_norm": 7.2434474881906, "learning_rate": 5.052670578871818e-06, "loss": 17.802, "step": 27997 }, { "epoch": 0.5117809421097849, "grad_norm": 5.303841371811925, "learning_rate": 5.052374583665994e-06, "loss": 17.1082, "step": 27998 }, { "epoch": 0.5117992213062313, "grad_norm": 5.851803379670491, "learning_rate": 5.052078588276602e-06, "loss": 17.4313, "step": 27999 }, { "epoch": 0.5118175005026779, "grad_norm": 8.61954880884424, "learning_rate": 5.051782592704679e-06, "loss": 18.0882, "step": 28000 }, { "epoch": 0.5118357796991244, "grad_norm": 6.047523950834138, "learning_rate": 5.051486596951264e-06, "loss": 17.4648, "step": 28001 }, { "epoch": 0.5118540588955709, "grad_norm": 6.244999563783573, "learning_rate": 5.051190601017391e-06, "loss": 17.2835, "step": 28002 }, { "epoch": 0.5118723380920175, "grad_norm": 9.220879058723916, "learning_rate": 5.0508946049041e-06, "loss": 18.5216, "step": 28003 }, { "epoch": 0.511890617288464, "grad_norm": 5.911686970152043, "learning_rate": 5.050598608612427e-06, "loss": 17.3586, "step": 28004 }, { "epoch": 0.5119088964849106, "grad_norm": 5.395181574700731, "learning_rate": 5.0503026121434115e-06, "loss": 17.2908, "step": 28005 }, { "epoch": 0.511927175681357, "grad_norm": 7.199897142317298, "learning_rate": 5.05000661549809e-06, "loss": 17.8668, "step": 28006 }, { "epoch": 0.5119454548778035, "grad_norm": 5.713902850634102, "learning_rate": 5.049710618677499e-06, "loss": 17.3038, "step": 28007 }, { "epoch": 0.5119637340742501, "grad_norm": 5.945746328317656, "learning_rate": 5.049414621682677e-06, "loss": 17.2434, "step": 28008 }, { "epoch": 0.5119820132706966, "grad_norm": 6.800970587928478, "learning_rate": 5.049118624514659e-06, "loss": 17.7604, "step": 28009 }, { "epoch": 0.5120002924671432, "grad_norm": 5.014364988281461, "learning_rate": 5.048822627174487e-06, "loss": 17.2556, "step": 28010 }, { "epoch": 0.5120185716635897, "grad_norm": 5.646783329337935, "learning_rate": 5.048526629663194e-06, "loss": 17.1364, "step": 28011 }, { "epoch": 0.5120368508600361, "grad_norm": 5.238593346776299, "learning_rate": 5.0482306319818205e-06, "loss": 17.0546, "step": 28012 }, { "epoch": 0.5120551300564827, "grad_norm": 5.867916078013198, "learning_rate": 5.047934634131403e-06, "loss": 17.257, "step": 28013 }, { "epoch": 0.5120734092529292, "grad_norm": 6.349823261825295, "learning_rate": 5.047638636112978e-06, "loss": 17.6975, "step": 28014 }, { "epoch": 0.5120916884493758, "grad_norm": 8.712592750857338, "learning_rate": 5.047342637927586e-06, "loss": 18.8455, "step": 28015 }, { "epoch": 0.5121099676458223, "grad_norm": 6.66391801475898, "learning_rate": 5.047046639576258e-06, "loss": 17.4625, "step": 28016 }, { "epoch": 0.5121282468422688, "grad_norm": 6.62869295930528, "learning_rate": 5.046750641060038e-06, "loss": 17.7247, "step": 28017 }, { "epoch": 0.5121465260387154, "grad_norm": 5.373166233411081, "learning_rate": 5.046454642379962e-06, "loss": 17.1545, "step": 28018 }, { "epoch": 0.5121648052351618, "grad_norm": 6.501967979661488, "learning_rate": 5.0461586435370656e-06, "loss": 17.5092, "step": 28019 }, { "epoch": 0.5121830844316084, "grad_norm": 6.33903576693256, "learning_rate": 5.045862644532388e-06, "loss": 17.8611, "step": 28020 }, { "epoch": 0.5122013636280549, "grad_norm": 5.994603276352138, "learning_rate": 5.045566645366963e-06, "loss": 17.3117, "step": 28021 }, { "epoch": 0.5122196428245014, "grad_norm": 5.630383901243195, "learning_rate": 5.045270646041834e-06, "loss": 17.291, "step": 28022 }, { "epoch": 0.512237922020948, "grad_norm": 5.532876931544379, "learning_rate": 5.044974646558034e-06, "loss": 17.0049, "step": 28023 }, { "epoch": 0.5122562012173945, "grad_norm": 5.008223902516464, "learning_rate": 5.044678646916602e-06, "loss": 17.0178, "step": 28024 }, { "epoch": 0.512274480413841, "grad_norm": 6.615996521253628, "learning_rate": 5.044382647118574e-06, "loss": 17.6848, "step": 28025 }, { "epoch": 0.5122927596102875, "grad_norm": 6.074262935974503, "learning_rate": 5.044086647164991e-06, "loss": 17.2916, "step": 28026 }, { "epoch": 0.512311038806734, "grad_norm": 5.6487417065577405, "learning_rate": 5.043790647056886e-06, "loss": 17.1785, "step": 28027 }, { "epoch": 0.5123293180031806, "grad_norm": 6.049154975666557, "learning_rate": 5.043494646795299e-06, "loss": 17.1928, "step": 28028 }, { "epoch": 0.5123475971996271, "grad_norm": 5.1185673294355, "learning_rate": 5.043198646381269e-06, "loss": 16.8799, "step": 28029 }, { "epoch": 0.5123658763960737, "grad_norm": 5.770846082516518, "learning_rate": 5.042902645815829e-06, "loss": 17.1606, "step": 28030 }, { "epoch": 0.5123841555925202, "grad_norm": 6.910684220598062, "learning_rate": 5.04260664510002e-06, "loss": 17.4591, "step": 28031 }, { "epoch": 0.5124024347889666, "grad_norm": 6.628891492238753, "learning_rate": 5.042310644234878e-06, "loss": 17.422, "step": 28032 }, { "epoch": 0.5124207139854132, "grad_norm": 5.3739276066214705, "learning_rate": 5.042014643221442e-06, "loss": 17.0926, "step": 28033 }, { "epoch": 0.5124389931818597, "grad_norm": 4.709496993209792, "learning_rate": 5.0417186420607475e-06, "loss": 16.9556, "step": 28034 }, { "epoch": 0.5124572723783063, "grad_norm": 6.570254315115292, "learning_rate": 5.041422640753831e-06, "loss": 17.3542, "step": 28035 }, { "epoch": 0.5124755515747528, "grad_norm": 5.702979363851849, "learning_rate": 5.041126639301736e-06, "loss": 17.0444, "step": 28036 }, { "epoch": 0.5124938307711993, "grad_norm": 6.134215999578505, "learning_rate": 5.040830637705493e-06, "loss": 17.3444, "step": 28037 }, { "epoch": 0.5125121099676458, "grad_norm": 5.53583089154324, "learning_rate": 5.040534635966141e-06, "loss": 17.1026, "step": 28038 }, { "epoch": 0.5125303891640923, "grad_norm": 6.692634199078798, "learning_rate": 5.040238634084721e-06, "loss": 17.1582, "step": 28039 }, { "epoch": 0.5125486683605389, "grad_norm": 9.112706744573707, "learning_rate": 5.0399426320622665e-06, "loss": 17.7881, "step": 28040 }, { "epoch": 0.5125669475569854, "grad_norm": 5.436373246965209, "learning_rate": 5.039646629899817e-06, "loss": 17.1567, "step": 28041 }, { "epoch": 0.5125852267534319, "grad_norm": 6.351125943732603, "learning_rate": 5.0393506275984094e-06, "loss": 17.621, "step": 28042 }, { "epoch": 0.5126035059498785, "grad_norm": 5.806286890615243, "learning_rate": 5.039054625159081e-06, "loss": 17.3372, "step": 28043 }, { "epoch": 0.512621785146325, "grad_norm": 5.160793728004603, "learning_rate": 5.03875862258287e-06, "loss": 16.8742, "step": 28044 }, { "epoch": 0.5126400643427715, "grad_norm": 6.267331369127202, "learning_rate": 5.038462619870814e-06, "loss": 17.3996, "step": 28045 }, { "epoch": 0.512658343539218, "grad_norm": 6.085634046898352, "learning_rate": 5.03816661702395e-06, "loss": 17.5692, "step": 28046 }, { "epoch": 0.5126766227356645, "grad_norm": 6.61253265012242, "learning_rate": 5.037870614043314e-06, "loss": 17.3804, "step": 28047 }, { "epoch": 0.5126949019321111, "grad_norm": 4.96611423998561, "learning_rate": 5.037574610929945e-06, "loss": 16.8542, "step": 28048 }, { "epoch": 0.5127131811285576, "grad_norm": 6.320321578904927, "learning_rate": 5.03727860768488e-06, "loss": 17.5675, "step": 28049 }, { "epoch": 0.5127314603250042, "grad_norm": 7.157330893384386, "learning_rate": 5.036982604309159e-06, "loss": 17.466, "step": 28050 }, { "epoch": 0.5127497395214506, "grad_norm": 5.3084310590091635, "learning_rate": 5.036686600803815e-06, "loss": 17.1899, "step": 28051 }, { "epoch": 0.5127680187178971, "grad_norm": 7.373330017990298, "learning_rate": 5.036390597169888e-06, "loss": 17.9253, "step": 28052 }, { "epoch": 0.5127862979143437, "grad_norm": 6.65378399298599, "learning_rate": 5.036094593408415e-06, "loss": 17.6431, "step": 28053 }, { "epoch": 0.5128045771107902, "grad_norm": 5.858154925957364, "learning_rate": 5.035798589520434e-06, "loss": 17.3389, "step": 28054 }, { "epoch": 0.5128228563072368, "grad_norm": 5.330596774222364, "learning_rate": 5.035502585506981e-06, "loss": 17.1273, "step": 28055 }, { "epoch": 0.5128411355036833, "grad_norm": 6.735683133722009, "learning_rate": 5.035206581369097e-06, "loss": 17.3526, "step": 28056 }, { "epoch": 0.5128594147001297, "grad_norm": 5.310218738370323, "learning_rate": 5.034910577107814e-06, "loss": 17.2729, "step": 28057 }, { "epoch": 0.5128776938965763, "grad_norm": 5.661281842404663, "learning_rate": 5.034614572724175e-06, "loss": 17.3244, "step": 28058 }, { "epoch": 0.5128959730930228, "grad_norm": 5.7079937216885845, "learning_rate": 5.034318568219213e-06, "loss": 17.4455, "step": 28059 }, { "epoch": 0.5129142522894694, "grad_norm": 5.33893828694486, "learning_rate": 5.0340225635939685e-06, "loss": 17.1936, "step": 28060 }, { "epoch": 0.5129325314859159, "grad_norm": 6.991607716437704, "learning_rate": 5.033726558849479e-06, "loss": 17.6457, "step": 28061 }, { "epoch": 0.5129508106823624, "grad_norm": 7.173764685392336, "learning_rate": 5.0334305539867774e-06, "loss": 18.1706, "step": 28062 }, { "epoch": 0.512969089878809, "grad_norm": 7.119662542488564, "learning_rate": 5.033134549006906e-06, "loss": 17.7013, "step": 28063 }, { "epoch": 0.5129873690752554, "grad_norm": 7.890981854441498, "learning_rate": 5.032838543910903e-06, "loss": 18.2298, "step": 28064 }, { "epoch": 0.513005648271702, "grad_norm": 6.941025290307531, "learning_rate": 5.032542538699803e-06, "loss": 17.8069, "step": 28065 }, { "epoch": 0.5130239274681485, "grad_norm": 7.707036799576286, "learning_rate": 5.032246533374643e-06, "loss": 17.4969, "step": 28066 }, { "epoch": 0.513042206664595, "grad_norm": 5.8355286680754626, "learning_rate": 5.031950527936462e-06, "loss": 17.2879, "step": 28067 }, { "epoch": 0.5130604858610416, "grad_norm": 6.097625669021527, "learning_rate": 5.031654522386297e-06, "loss": 17.4816, "step": 28068 }, { "epoch": 0.5130787650574881, "grad_norm": 6.391443302246504, "learning_rate": 5.031358516725185e-06, "loss": 17.5693, "step": 28069 }, { "epoch": 0.5130970442539345, "grad_norm": 5.853587011187509, "learning_rate": 5.031062510954166e-06, "loss": 17.2966, "step": 28070 }, { "epoch": 0.5131153234503811, "grad_norm": 6.888218201777283, "learning_rate": 5.030766505074275e-06, "loss": 17.6219, "step": 28071 }, { "epoch": 0.5131336026468276, "grad_norm": 7.078410786984801, "learning_rate": 5.030470499086549e-06, "loss": 17.7244, "step": 28072 }, { "epoch": 0.5131518818432742, "grad_norm": 5.892402288292222, "learning_rate": 5.030174492992027e-06, "loss": 17.2036, "step": 28073 }, { "epoch": 0.5131701610397207, "grad_norm": 6.045014744014676, "learning_rate": 5.029878486791748e-06, "loss": 17.318, "step": 28074 }, { "epoch": 0.5131884402361672, "grad_norm": 5.76539178992317, "learning_rate": 5.029582480486744e-06, "loss": 17.2788, "step": 28075 }, { "epoch": 0.5132067194326138, "grad_norm": 6.317937925272435, "learning_rate": 5.029286474078058e-06, "loss": 17.3094, "step": 28076 }, { "epoch": 0.5132249986290602, "grad_norm": 5.9402884217697665, "learning_rate": 5.028990467566727e-06, "loss": 17.1369, "step": 28077 }, { "epoch": 0.5132432778255068, "grad_norm": 6.658072244458772, "learning_rate": 5.028694460953785e-06, "loss": 17.6295, "step": 28078 }, { "epoch": 0.5132615570219533, "grad_norm": 6.548053989903735, "learning_rate": 5.028398454240271e-06, "loss": 17.6661, "step": 28079 }, { "epoch": 0.5132798362183998, "grad_norm": 7.370262231792316, "learning_rate": 5.0281024474272225e-06, "loss": 18.1196, "step": 28080 }, { "epoch": 0.5132981154148464, "grad_norm": 5.794905417242664, "learning_rate": 5.027806440515679e-06, "loss": 17.0295, "step": 28081 }, { "epoch": 0.5133163946112929, "grad_norm": 6.559785798742597, "learning_rate": 5.027510433506676e-06, "loss": 17.4465, "step": 28082 }, { "epoch": 0.5133346738077394, "grad_norm": 6.161346157356899, "learning_rate": 5.02721442640125e-06, "loss": 17.6543, "step": 28083 }, { "epoch": 0.5133529530041859, "grad_norm": 4.745825669070633, "learning_rate": 5.026918419200442e-06, "loss": 16.9587, "step": 28084 }, { "epoch": 0.5133712322006324, "grad_norm": 7.6563757111778985, "learning_rate": 5.0266224119052855e-06, "loss": 18.0366, "step": 28085 }, { "epoch": 0.513389511397079, "grad_norm": 7.5577163342264315, "learning_rate": 5.026326404516821e-06, "loss": 18.04, "step": 28086 }, { "epoch": 0.5134077905935255, "grad_norm": 5.61846200718234, "learning_rate": 5.0260303970360835e-06, "loss": 17.259, "step": 28087 }, { "epoch": 0.5134260697899721, "grad_norm": 7.114267771998251, "learning_rate": 5.025734389464113e-06, "loss": 17.5694, "step": 28088 }, { "epoch": 0.5134443489864186, "grad_norm": 5.122244468492323, "learning_rate": 5.025438381801943e-06, "loss": 16.9749, "step": 28089 }, { "epoch": 0.513462628182865, "grad_norm": 8.060971334515285, "learning_rate": 5.0251423740506155e-06, "loss": 18.7725, "step": 28090 }, { "epoch": 0.5134809073793116, "grad_norm": 6.072602028199104, "learning_rate": 5.024846366211168e-06, "loss": 17.5009, "step": 28091 }, { "epoch": 0.5134991865757581, "grad_norm": 7.245636367424293, "learning_rate": 5.024550358284634e-06, "loss": 17.753, "step": 28092 }, { "epoch": 0.5135174657722047, "grad_norm": 6.0389132423800636, "learning_rate": 5.024254350272054e-06, "loss": 17.379, "step": 28093 }, { "epoch": 0.5135357449686512, "grad_norm": 5.406577581521393, "learning_rate": 5.023958342174463e-06, "loss": 17.1855, "step": 28094 }, { "epoch": 0.5135540241650977, "grad_norm": 5.828234473307503, "learning_rate": 5.0236623339929025e-06, "loss": 17.0713, "step": 28095 }, { "epoch": 0.5135723033615442, "grad_norm": 4.6051927541259134, "learning_rate": 5.023366325728406e-06, "loss": 16.7456, "step": 28096 }, { "epoch": 0.5135905825579907, "grad_norm": 6.234438137695905, "learning_rate": 5.023070317382013e-06, "loss": 17.4698, "step": 28097 }, { "epoch": 0.5136088617544373, "grad_norm": 6.745582056480888, "learning_rate": 5.0227743089547594e-06, "loss": 17.3086, "step": 28098 }, { "epoch": 0.5136271409508838, "grad_norm": 5.385427269965797, "learning_rate": 5.022478300447685e-06, "loss": 17.2957, "step": 28099 }, { "epoch": 0.5136454201473303, "grad_norm": 8.091292903650018, "learning_rate": 5.022182291861826e-06, "loss": 17.9107, "step": 28100 }, { "epoch": 0.5136636993437769, "grad_norm": 6.26059130775774, "learning_rate": 5.021886283198221e-06, "loss": 17.4496, "step": 28101 }, { "epoch": 0.5136819785402233, "grad_norm": 6.137892618674586, "learning_rate": 5.021590274457906e-06, "loss": 17.6514, "step": 28102 }, { "epoch": 0.5137002577366699, "grad_norm": 7.384613148894318, "learning_rate": 5.0212942656419175e-06, "loss": 17.8735, "step": 28103 }, { "epoch": 0.5137185369331164, "grad_norm": 6.224373819966781, "learning_rate": 5.020998256751295e-06, "loss": 17.6275, "step": 28104 }, { "epoch": 0.5137368161295629, "grad_norm": 6.561541920810067, "learning_rate": 5.020702247787076e-06, "loss": 17.4027, "step": 28105 }, { "epoch": 0.5137550953260095, "grad_norm": 6.348724462220238, "learning_rate": 5.020406238750297e-06, "loss": 17.2009, "step": 28106 }, { "epoch": 0.513773374522456, "grad_norm": 5.6274317742546, "learning_rate": 5.020110229641997e-06, "loss": 17.1539, "step": 28107 }, { "epoch": 0.5137916537189026, "grad_norm": 5.402416735629333, "learning_rate": 5.01981422046321e-06, "loss": 16.9529, "step": 28108 }, { "epoch": 0.513809932915349, "grad_norm": 7.337176309987599, "learning_rate": 5.019518211214978e-06, "loss": 17.723, "step": 28109 }, { "epoch": 0.5138282121117955, "grad_norm": 6.865097560876604, "learning_rate": 5.019222201898336e-06, "loss": 17.5339, "step": 28110 }, { "epoch": 0.5138464913082421, "grad_norm": 6.23455616923348, "learning_rate": 5.0189261925143214e-06, "loss": 17.2609, "step": 28111 }, { "epoch": 0.5138647705046886, "grad_norm": 6.809861678289743, "learning_rate": 5.018630183063972e-06, "loss": 17.4722, "step": 28112 }, { "epoch": 0.5138830497011352, "grad_norm": 6.1133301907662245, "learning_rate": 5.018334173548326e-06, "loss": 17.4801, "step": 28113 }, { "epoch": 0.5139013288975817, "grad_norm": 11.552232517083285, "learning_rate": 5.018038163968419e-06, "loss": 18.6332, "step": 28114 }, { "epoch": 0.5139196080940281, "grad_norm": 6.335392764099568, "learning_rate": 5.0177421543252925e-06, "loss": 17.5472, "step": 28115 }, { "epoch": 0.5139378872904747, "grad_norm": 5.9829507015151036, "learning_rate": 5.017446144619978e-06, "loss": 17.6113, "step": 28116 }, { "epoch": 0.5139561664869212, "grad_norm": 5.410154458145175, "learning_rate": 5.017150134853518e-06, "loss": 17.0227, "step": 28117 }, { "epoch": 0.5139744456833678, "grad_norm": 5.83075024866596, "learning_rate": 5.016854125026949e-06, "loss": 17.3264, "step": 28118 }, { "epoch": 0.5139927248798143, "grad_norm": 5.460150176460602, "learning_rate": 5.016558115141308e-06, "loss": 17.1501, "step": 28119 }, { "epoch": 0.5140110040762608, "grad_norm": 7.476150179442159, "learning_rate": 5.01626210519763e-06, "loss": 18.2651, "step": 28120 }, { "epoch": 0.5140292832727074, "grad_norm": 8.293272410674218, "learning_rate": 5.015966095196956e-06, "loss": 18.1733, "step": 28121 }, { "epoch": 0.5140475624691538, "grad_norm": 8.932934328206157, "learning_rate": 5.015670085140323e-06, "loss": 18.0572, "step": 28122 }, { "epoch": 0.5140658416656004, "grad_norm": 6.376393939232121, "learning_rate": 5.0153740750287665e-06, "loss": 17.6159, "step": 28123 }, { "epoch": 0.5140841208620469, "grad_norm": 4.975699133719373, "learning_rate": 5.015078064863325e-06, "loss": 16.8964, "step": 28124 }, { "epoch": 0.5141024000584934, "grad_norm": 5.524051630978833, "learning_rate": 5.014782054645037e-06, "loss": 17.229, "step": 28125 }, { "epoch": 0.51412067925494, "grad_norm": 6.733322352927828, "learning_rate": 5.014486044374939e-06, "loss": 17.7211, "step": 28126 }, { "epoch": 0.5141389584513865, "grad_norm": 6.145482932736864, "learning_rate": 5.014190034054068e-06, "loss": 17.4748, "step": 28127 }, { "epoch": 0.514157237647833, "grad_norm": 10.969824636140316, "learning_rate": 5.013894023683463e-06, "loss": 18.5218, "step": 28128 }, { "epoch": 0.5141755168442795, "grad_norm": 6.207129862453142, "learning_rate": 5.013598013264162e-06, "loss": 17.3163, "step": 28129 }, { "epoch": 0.514193796040726, "grad_norm": 8.738833173940085, "learning_rate": 5.013302002797198e-06, "loss": 18.1539, "step": 28130 }, { "epoch": 0.5142120752371726, "grad_norm": 6.69535504124619, "learning_rate": 5.013005992283613e-06, "loss": 17.2844, "step": 28131 }, { "epoch": 0.5142303544336191, "grad_norm": 6.870859011575733, "learning_rate": 5.012709981724443e-06, "loss": 17.4839, "step": 28132 }, { "epoch": 0.5142486336300657, "grad_norm": 5.880878576065578, "learning_rate": 5.012413971120726e-06, "loss": 17.4608, "step": 28133 }, { "epoch": 0.5142669128265122, "grad_norm": 7.412049525932334, "learning_rate": 5.0121179604735005e-06, "loss": 17.7907, "step": 28134 }, { "epoch": 0.5142851920229586, "grad_norm": 5.182481876257259, "learning_rate": 5.011821949783799e-06, "loss": 17.1516, "step": 28135 }, { "epoch": 0.5143034712194052, "grad_norm": 7.256991660617841, "learning_rate": 5.011525939052666e-06, "loss": 17.9211, "step": 28136 }, { "epoch": 0.5143217504158517, "grad_norm": 6.612943637546306, "learning_rate": 5.011229928281134e-06, "loss": 17.7423, "step": 28137 }, { "epoch": 0.5143400296122982, "grad_norm": 6.236537944236401, "learning_rate": 5.010933917470243e-06, "loss": 17.4842, "step": 28138 }, { "epoch": 0.5143583088087448, "grad_norm": 6.912204897544808, "learning_rate": 5.0106379066210285e-06, "loss": 17.5455, "step": 28139 }, { "epoch": 0.5143765880051913, "grad_norm": 5.305372202814259, "learning_rate": 5.010341895734529e-06, "loss": 17.1269, "step": 28140 }, { "epoch": 0.5143948672016379, "grad_norm": 5.715634971383142, "learning_rate": 5.010045884811783e-06, "loss": 17.015, "step": 28141 }, { "epoch": 0.5144131463980843, "grad_norm": 8.049630120347942, "learning_rate": 5.009749873853827e-06, "loss": 18.0077, "step": 28142 }, { "epoch": 0.5144314255945308, "grad_norm": 5.197981675803811, "learning_rate": 5.009453862861697e-06, "loss": 16.8769, "step": 28143 }, { "epoch": 0.5144497047909774, "grad_norm": 6.366521019383396, "learning_rate": 5.009157851836434e-06, "loss": 17.6728, "step": 28144 }, { "epoch": 0.5144679839874239, "grad_norm": 4.922210672212978, "learning_rate": 5.008861840779072e-06, "loss": 16.7652, "step": 28145 }, { "epoch": 0.5144862631838705, "grad_norm": 6.174499497399777, "learning_rate": 5.008565829690652e-06, "loss": 17.2349, "step": 28146 }, { "epoch": 0.514504542380317, "grad_norm": 6.432555622617168, "learning_rate": 5.0082698185722076e-06, "loss": 17.6648, "step": 28147 }, { "epoch": 0.5145228215767634, "grad_norm": 6.826354523350087, "learning_rate": 5.007973807424778e-06, "loss": 17.4025, "step": 28148 }, { "epoch": 0.51454110077321, "grad_norm": 5.885592308033598, "learning_rate": 5.007677796249402e-06, "loss": 17.3885, "step": 28149 }, { "epoch": 0.5145593799696565, "grad_norm": 5.243712760308505, "learning_rate": 5.007381785047116e-06, "loss": 17.0312, "step": 28150 }, { "epoch": 0.5145776591661031, "grad_norm": 6.797253314513713, "learning_rate": 5.007085773818958e-06, "loss": 17.6553, "step": 28151 }, { "epoch": 0.5145959383625496, "grad_norm": 5.585097237420028, "learning_rate": 5.006789762565964e-06, "loss": 17.2602, "step": 28152 }, { "epoch": 0.5146142175589961, "grad_norm": 6.464299392483597, "learning_rate": 5.006493751289172e-06, "loss": 17.3206, "step": 28153 }, { "epoch": 0.5146324967554426, "grad_norm": 5.9594289792473, "learning_rate": 5.006197739989621e-06, "loss": 17.4366, "step": 28154 }, { "epoch": 0.5146507759518891, "grad_norm": 5.19228241604465, "learning_rate": 5.005901728668346e-06, "loss": 16.9556, "step": 28155 }, { "epoch": 0.5146690551483357, "grad_norm": 5.6669730647734475, "learning_rate": 5.005605717326388e-06, "loss": 17.3045, "step": 28156 }, { "epoch": 0.5146873343447822, "grad_norm": 4.970170824432851, "learning_rate": 5.005309705964782e-06, "loss": 16.9029, "step": 28157 }, { "epoch": 0.5147056135412287, "grad_norm": 6.8288527551437115, "learning_rate": 5.005013694584565e-06, "loss": 17.6727, "step": 28158 }, { "epoch": 0.5147238927376753, "grad_norm": 6.65212379126175, "learning_rate": 5.004717683186775e-06, "loss": 17.6678, "step": 28159 }, { "epoch": 0.5147421719341218, "grad_norm": 6.49324925634759, "learning_rate": 5.004421671772453e-06, "loss": 17.4102, "step": 28160 }, { "epoch": 0.5147604511305683, "grad_norm": 6.6435775706349505, "learning_rate": 5.00412566034263e-06, "loss": 17.6286, "step": 28161 }, { "epoch": 0.5147787303270148, "grad_norm": 5.318601132790432, "learning_rate": 5.003829648898347e-06, "loss": 17.1369, "step": 28162 }, { "epoch": 0.5147970095234613, "grad_norm": 5.851364989000181, "learning_rate": 5.003533637440643e-06, "loss": 17.1015, "step": 28163 }, { "epoch": 0.5148152887199079, "grad_norm": 5.97345678470819, "learning_rate": 5.003237625970554e-06, "loss": 17.3515, "step": 28164 }, { "epoch": 0.5148335679163544, "grad_norm": 4.319079791050368, "learning_rate": 5.002941614489117e-06, "loss": 16.7136, "step": 28165 }, { "epoch": 0.514851847112801, "grad_norm": 6.030998116732727, "learning_rate": 5.0026456029973705e-06, "loss": 17.2514, "step": 28166 }, { "epoch": 0.5148701263092474, "grad_norm": 7.039669797362035, "learning_rate": 5.002349591496349e-06, "loss": 17.6927, "step": 28167 }, { "epoch": 0.5148884055056939, "grad_norm": 7.865756664491041, "learning_rate": 5.002053579987095e-06, "loss": 17.9537, "step": 28168 }, { "epoch": 0.5149066847021405, "grad_norm": 6.962113674781745, "learning_rate": 5.001757568470642e-06, "loss": 17.5078, "step": 28169 }, { "epoch": 0.514924963898587, "grad_norm": 6.504396415023693, "learning_rate": 5.00146155694803e-06, "loss": 17.4157, "step": 28170 }, { "epoch": 0.5149432430950336, "grad_norm": 5.863818599382739, "learning_rate": 5.001165545420293e-06, "loss": 17.2288, "step": 28171 }, { "epoch": 0.5149615222914801, "grad_norm": 6.319205856870767, "learning_rate": 5.0008695338884725e-06, "loss": 17.4342, "step": 28172 }, { "epoch": 0.5149798014879265, "grad_norm": 5.943042318046097, "learning_rate": 5.000573522353604e-06, "loss": 17.356, "step": 28173 }, { "epoch": 0.5149980806843731, "grad_norm": 6.129856488068772, "learning_rate": 5.000277510816728e-06, "loss": 17.3474, "step": 28174 }, { "epoch": 0.5150163598808196, "grad_norm": 5.945116040970834, "learning_rate": 4.999981499278876e-06, "loss": 17.1864, "step": 28175 }, { "epoch": 0.5150346390772662, "grad_norm": 8.265559736040654, "learning_rate": 4.9996854877410905e-06, "loss": 17.8946, "step": 28176 }, { "epoch": 0.5150529182737127, "grad_norm": 6.437334872176177, "learning_rate": 4.999389476204406e-06, "loss": 17.5352, "step": 28177 }, { "epoch": 0.5150711974701592, "grad_norm": 5.419875494669513, "learning_rate": 4.999093464669863e-06, "loss": 16.9175, "step": 28178 }, { "epoch": 0.5150894766666058, "grad_norm": 6.770252063586818, "learning_rate": 4.998797453138496e-06, "loss": 17.6959, "step": 28179 }, { "epoch": 0.5151077558630522, "grad_norm": 5.81401931216123, "learning_rate": 4.998501441611343e-06, "loss": 17.1223, "step": 28180 }, { "epoch": 0.5151260350594988, "grad_norm": 6.229354007716394, "learning_rate": 4.998205430089445e-06, "loss": 17.4315, "step": 28181 }, { "epoch": 0.5151443142559453, "grad_norm": 6.00189037519563, "learning_rate": 4.9979094185738344e-06, "loss": 17.3048, "step": 28182 }, { "epoch": 0.5151625934523918, "grad_norm": 6.072229788591644, "learning_rate": 4.997613407065552e-06, "loss": 17.2411, "step": 28183 }, { "epoch": 0.5151808726488384, "grad_norm": 4.97116775481753, "learning_rate": 4.997317395565635e-06, "loss": 16.9966, "step": 28184 }, { "epoch": 0.5151991518452849, "grad_norm": 5.731876597442955, "learning_rate": 4.9970213840751185e-06, "loss": 17.3688, "step": 28185 }, { "epoch": 0.5152174310417315, "grad_norm": 6.139715199829413, "learning_rate": 4.996725372595044e-06, "loss": 17.1598, "step": 28186 }, { "epoch": 0.5152357102381779, "grad_norm": 6.436362192283496, "learning_rate": 4.996429361126447e-06, "loss": 17.4544, "step": 28187 }, { "epoch": 0.5152539894346244, "grad_norm": 7.375691130758968, "learning_rate": 4.996133349670362e-06, "loss": 17.7593, "step": 28188 }, { "epoch": 0.515272268631071, "grad_norm": 6.085832382220902, "learning_rate": 4.995837338227832e-06, "loss": 17.4433, "step": 28189 }, { "epoch": 0.5152905478275175, "grad_norm": 6.663826732315967, "learning_rate": 4.9955413267998905e-06, "loss": 17.5097, "step": 28190 }, { "epoch": 0.5153088270239641, "grad_norm": 5.015545658554212, "learning_rate": 4.995245315387575e-06, "loss": 16.7537, "step": 28191 }, { "epoch": 0.5153271062204106, "grad_norm": 5.838505174851261, "learning_rate": 4.994949303991928e-06, "loss": 17.252, "step": 28192 }, { "epoch": 0.515345385416857, "grad_norm": 8.249928955669725, "learning_rate": 4.9946532926139805e-06, "loss": 18.2775, "step": 28193 }, { "epoch": 0.5153636646133036, "grad_norm": 6.7158528747227955, "learning_rate": 4.994357281254772e-06, "loss": 17.6138, "step": 28194 }, { "epoch": 0.5153819438097501, "grad_norm": 5.824144166676193, "learning_rate": 4.994061269915343e-06, "loss": 17.2477, "step": 28195 }, { "epoch": 0.5154002230061967, "grad_norm": 5.463913434287644, "learning_rate": 4.993765258596728e-06, "loss": 17.3145, "step": 28196 }, { "epoch": 0.5154185022026432, "grad_norm": 6.420409486988014, "learning_rate": 4.993469247299964e-06, "loss": 17.47, "step": 28197 }, { "epoch": 0.5154367813990897, "grad_norm": 5.579108004881442, "learning_rate": 4.993173236026091e-06, "loss": 17.1566, "step": 28198 }, { "epoch": 0.5154550605955363, "grad_norm": 6.457916538417718, "learning_rate": 4.9928772247761435e-06, "loss": 17.6163, "step": 28199 }, { "epoch": 0.5154733397919827, "grad_norm": 6.571864744552564, "learning_rate": 4.992581213551163e-06, "loss": 17.651, "step": 28200 }, { "epoch": 0.5154916189884293, "grad_norm": 6.43539111382244, "learning_rate": 4.992285202352184e-06, "loss": 17.5781, "step": 28201 }, { "epoch": 0.5155098981848758, "grad_norm": 5.1435792108081895, "learning_rate": 4.9919891911802445e-06, "loss": 17.0905, "step": 28202 }, { "epoch": 0.5155281773813223, "grad_norm": 7.134187654591497, "learning_rate": 4.991693180036382e-06, "loss": 17.9438, "step": 28203 }, { "epoch": 0.5155464565777689, "grad_norm": 6.490637936095741, "learning_rate": 4.9913971689216355e-06, "loss": 17.351, "step": 28204 }, { "epoch": 0.5155647357742154, "grad_norm": 7.956565135064702, "learning_rate": 4.991101157837038e-06, "loss": 17.9674, "step": 28205 }, { "epoch": 0.5155830149706618, "grad_norm": 6.678808826906036, "learning_rate": 4.990805146783633e-06, "loss": 17.649, "step": 28206 }, { "epoch": 0.5156012941671084, "grad_norm": 6.412975551902454, "learning_rate": 4.990509135762455e-06, "loss": 17.5338, "step": 28207 }, { "epoch": 0.5156195733635549, "grad_norm": 8.295048085068535, "learning_rate": 4.9902131247745395e-06, "loss": 17.9047, "step": 28208 }, { "epoch": 0.5156378525600015, "grad_norm": 6.91658108633665, "learning_rate": 4.989917113820928e-06, "loss": 17.6383, "step": 28209 }, { "epoch": 0.515656131756448, "grad_norm": 5.576102802165591, "learning_rate": 4.989621102902658e-06, "loss": 17.3871, "step": 28210 }, { "epoch": 0.5156744109528945, "grad_norm": 8.235191717878612, "learning_rate": 4.9893250920207606e-06, "loss": 18.031, "step": 28211 }, { "epoch": 0.515692690149341, "grad_norm": 5.481930958457845, "learning_rate": 4.98902908117628e-06, "loss": 17.0455, "step": 28212 }, { "epoch": 0.5157109693457875, "grad_norm": 6.837430098374462, "learning_rate": 4.988733070370251e-06, "loss": 17.7349, "step": 28213 }, { "epoch": 0.5157292485422341, "grad_norm": 7.129773801985469, "learning_rate": 4.988437059603713e-06, "loss": 17.821, "step": 28214 }, { "epoch": 0.5157475277386806, "grad_norm": 8.401615798316545, "learning_rate": 4.988141048877703e-06, "loss": 18.8932, "step": 28215 }, { "epoch": 0.5157658069351271, "grad_norm": 6.682211248851416, "learning_rate": 4.987845038193254e-06, "loss": 17.6356, "step": 28216 }, { "epoch": 0.5157840861315737, "grad_norm": 6.583499921389675, "learning_rate": 4.987549027551409e-06, "loss": 17.3783, "step": 28217 }, { "epoch": 0.5158023653280202, "grad_norm": 5.521369248951098, "learning_rate": 4.987253016953205e-06, "loss": 17.1592, "step": 28218 }, { "epoch": 0.5158206445244667, "grad_norm": 6.762331827717847, "learning_rate": 4.986957006399675e-06, "loss": 17.2717, "step": 28219 }, { "epoch": 0.5158389237209132, "grad_norm": 6.7441140402099, "learning_rate": 4.986660995891862e-06, "loss": 17.633, "step": 28220 }, { "epoch": 0.5158572029173597, "grad_norm": 7.605188296975337, "learning_rate": 4.986364985430801e-06, "loss": 17.9664, "step": 28221 }, { "epoch": 0.5158754821138063, "grad_norm": 5.954533950344529, "learning_rate": 4.986068975017527e-06, "loss": 17.1513, "step": 28222 }, { "epoch": 0.5158937613102528, "grad_norm": 5.19384164719807, "learning_rate": 4.985772964653083e-06, "loss": 17.0377, "step": 28223 }, { "epoch": 0.5159120405066994, "grad_norm": 5.709840000751817, "learning_rate": 4.985476954338504e-06, "loss": 17.0539, "step": 28224 }, { "epoch": 0.5159303197031458, "grad_norm": 5.467003263646111, "learning_rate": 4.985180944074824e-06, "loss": 17.1586, "step": 28225 }, { "epoch": 0.5159485988995923, "grad_norm": 6.818836571595896, "learning_rate": 4.984884933863085e-06, "loss": 17.7478, "step": 28226 }, { "epoch": 0.5159668780960389, "grad_norm": 6.905771052252174, "learning_rate": 4.984588923704323e-06, "loss": 18.1019, "step": 28227 }, { "epoch": 0.5159851572924854, "grad_norm": 5.661404374163345, "learning_rate": 4.984292913599575e-06, "loss": 17.0274, "step": 28228 }, { "epoch": 0.516003436488932, "grad_norm": 7.014521349377153, "learning_rate": 4.983996903549881e-06, "loss": 17.7912, "step": 28229 }, { "epoch": 0.5160217156853785, "grad_norm": 6.232023919122138, "learning_rate": 4.983700893556273e-06, "loss": 17.6686, "step": 28230 }, { "epoch": 0.516039994881825, "grad_norm": 7.890911013477252, "learning_rate": 4.983404883619794e-06, "loss": 17.769, "step": 28231 }, { "epoch": 0.5160582740782715, "grad_norm": 6.872587574323388, "learning_rate": 4.98310887374148e-06, "loss": 17.5923, "step": 28232 }, { "epoch": 0.516076553274718, "grad_norm": 5.201490169639113, "learning_rate": 4.982812863922366e-06, "loss": 17.0947, "step": 28233 }, { "epoch": 0.5160948324711646, "grad_norm": 5.763744767295851, "learning_rate": 4.982516854163494e-06, "loss": 17.3743, "step": 28234 }, { "epoch": 0.5161131116676111, "grad_norm": 6.914334473628715, "learning_rate": 4.982220844465897e-06, "loss": 18.0751, "step": 28235 }, { "epoch": 0.5161313908640576, "grad_norm": 6.620543844060205, "learning_rate": 4.981924834830614e-06, "loss": 17.7356, "step": 28236 }, { "epoch": 0.5161496700605042, "grad_norm": 6.557555013743278, "learning_rate": 4.9816288252586844e-06, "loss": 17.4938, "step": 28237 }, { "epoch": 0.5161679492569506, "grad_norm": 6.9004998332954015, "learning_rate": 4.981332815751144e-06, "loss": 17.7065, "step": 28238 }, { "epoch": 0.5161862284533972, "grad_norm": 6.3165920019418875, "learning_rate": 4.98103680630903e-06, "loss": 17.3589, "step": 28239 }, { "epoch": 0.5162045076498437, "grad_norm": 6.509015542500228, "learning_rate": 4.98074079693338e-06, "loss": 17.6352, "step": 28240 }, { "epoch": 0.5162227868462902, "grad_norm": 7.113184527318696, "learning_rate": 4.980444787625233e-06, "loss": 17.6092, "step": 28241 }, { "epoch": 0.5162410660427368, "grad_norm": 6.14557656788362, "learning_rate": 4.980148778385623e-06, "loss": 17.1574, "step": 28242 }, { "epoch": 0.5162593452391833, "grad_norm": 7.213181570738609, "learning_rate": 4.9798527692155915e-06, "loss": 18.0375, "step": 28243 }, { "epoch": 0.5162776244356299, "grad_norm": 5.639323384051671, "learning_rate": 4.9795567601161735e-06, "loss": 17.1832, "step": 28244 }, { "epoch": 0.5162959036320763, "grad_norm": 6.137465459690317, "learning_rate": 4.979260751088409e-06, "loss": 17.5277, "step": 28245 }, { "epoch": 0.5163141828285228, "grad_norm": 7.096317856002897, "learning_rate": 4.9789647421333335e-06, "loss": 17.6459, "step": 28246 }, { "epoch": 0.5163324620249694, "grad_norm": 6.791427737996549, "learning_rate": 4.978668733251982e-06, "loss": 17.8373, "step": 28247 }, { "epoch": 0.5163507412214159, "grad_norm": 10.550594804003643, "learning_rate": 4.978372724445397e-06, "loss": 18.8802, "step": 28248 }, { "epoch": 0.5163690204178625, "grad_norm": 5.836382127207683, "learning_rate": 4.978076715714614e-06, "loss": 17.6371, "step": 28249 }, { "epoch": 0.516387299614309, "grad_norm": 6.107905509674293, "learning_rate": 4.977780707060668e-06, "loss": 17.2012, "step": 28250 }, { "epoch": 0.5164055788107554, "grad_norm": 6.893817051738435, "learning_rate": 4.977484698484602e-06, "loss": 17.5091, "step": 28251 }, { "epoch": 0.516423858007202, "grad_norm": 5.507606081739157, "learning_rate": 4.9771886899874485e-06, "loss": 17.087, "step": 28252 }, { "epoch": 0.5164421372036485, "grad_norm": 5.895304799896382, "learning_rate": 4.976892681570246e-06, "loss": 17.3784, "step": 28253 }, { "epoch": 0.5164604164000951, "grad_norm": 6.530205478059318, "learning_rate": 4.9765966732340335e-06, "loss": 17.5561, "step": 28254 }, { "epoch": 0.5164786955965416, "grad_norm": 6.872070323979689, "learning_rate": 4.9763006649798485e-06, "loss": 17.7354, "step": 28255 }, { "epoch": 0.5164969747929881, "grad_norm": 6.304183298980531, "learning_rate": 4.976004656808725e-06, "loss": 17.3701, "step": 28256 }, { "epoch": 0.5165152539894347, "grad_norm": 6.000271605623386, "learning_rate": 4.975708648721705e-06, "loss": 17.3946, "step": 28257 }, { "epoch": 0.5165335331858811, "grad_norm": 6.491954323026376, "learning_rate": 4.975412640719825e-06, "loss": 17.6466, "step": 28258 }, { "epoch": 0.5165518123823277, "grad_norm": 5.061908246575599, "learning_rate": 4.975116632804119e-06, "loss": 16.9296, "step": 28259 }, { "epoch": 0.5165700915787742, "grad_norm": 6.976247778399507, "learning_rate": 4.974820624975629e-06, "loss": 17.6008, "step": 28260 }, { "epoch": 0.5165883707752207, "grad_norm": 6.742763888324338, "learning_rate": 4.974524617235389e-06, "loss": 17.6102, "step": 28261 }, { "epoch": 0.5166066499716673, "grad_norm": 6.6387729698208275, "learning_rate": 4.974228609584438e-06, "loss": 17.6382, "step": 28262 }, { "epoch": 0.5166249291681138, "grad_norm": 6.2449695980180415, "learning_rate": 4.973932602023816e-06, "loss": 17.5073, "step": 28263 }, { "epoch": 0.5166432083645603, "grad_norm": 6.450636685086195, "learning_rate": 4.973636594554555e-06, "loss": 17.3292, "step": 28264 }, { "epoch": 0.5166614875610068, "grad_norm": 7.9278652694344975, "learning_rate": 4.973340587177698e-06, "loss": 18.0148, "step": 28265 }, { "epoch": 0.5166797667574533, "grad_norm": 7.312880169912325, "learning_rate": 4.9730445798942784e-06, "loss": 17.8211, "step": 28266 }, { "epoch": 0.5166980459538999, "grad_norm": 5.976868121343258, "learning_rate": 4.972748572705334e-06, "loss": 17.2071, "step": 28267 }, { "epoch": 0.5167163251503464, "grad_norm": 6.468444778454015, "learning_rate": 4.972452565611906e-06, "loss": 17.235, "step": 28268 }, { "epoch": 0.516734604346793, "grad_norm": 5.1413800935468235, "learning_rate": 4.9721565586150295e-06, "loss": 16.8773, "step": 28269 }, { "epoch": 0.5167528835432394, "grad_norm": 7.270722378199894, "learning_rate": 4.971860551715739e-06, "loss": 17.472, "step": 28270 }, { "epoch": 0.5167711627396859, "grad_norm": 7.904071788550602, "learning_rate": 4.971564544915077e-06, "loss": 18.0134, "step": 28271 }, { "epoch": 0.5167894419361325, "grad_norm": 5.7469197328987995, "learning_rate": 4.971268538214079e-06, "loss": 17.2567, "step": 28272 }, { "epoch": 0.516807721132579, "grad_norm": 5.030550165373328, "learning_rate": 4.97097253161378e-06, "loss": 16.8883, "step": 28273 }, { "epoch": 0.5168260003290255, "grad_norm": 6.871964276897015, "learning_rate": 4.970676525115223e-06, "loss": 17.7691, "step": 28274 }, { "epoch": 0.5168442795254721, "grad_norm": 4.606991548838814, "learning_rate": 4.970380518719439e-06, "loss": 16.7543, "step": 28275 }, { "epoch": 0.5168625587219186, "grad_norm": 5.56013897016377, "learning_rate": 4.97008451242747e-06, "loss": 17.3125, "step": 28276 }, { "epoch": 0.5168808379183651, "grad_norm": 6.780150570995449, "learning_rate": 4.969788506240354e-06, "loss": 17.4703, "step": 28277 }, { "epoch": 0.5168991171148116, "grad_norm": 6.311010318448981, "learning_rate": 4.9694925001591235e-06, "loss": 17.3542, "step": 28278 }, { "epoch": 0.5169173963112581, "grad_norm": 6.854886493991408, "learning_rate": 4.969196494184822e-06, "loss": 17.5825, "step": 28279 }, { "epoch": 0.5169356755077047, "grad_norm": 8.301282717597386, "learning_rate": 4.968900488318483e-06, "loss": 18.3161, "step": 28280 }, { "epoch": 0.5169539547041512, "grad_norm": 6.235898453284987, "learning_rate": 4.968604482561143e-06, "loss": 17.5212, "step": 28281 }, { "epoch": 0.5169722339005978, "grad_norm": 6.341026571008973, "learning_rate": 4.968308476913845e-06, "loss": 17.5553, "step": 28282 }, { "epoch": 0.5169905130970442, "grad_norm": 7.778073061671004, "learning_rate": 4.968012471377623e-06, "loss": 18.2273, "step": 28283 }, { "epoch": 0.5170087922934907, "grad_norm": 5.256761677275139, "learning_rate": 4.967716465953512e-06, "loss": 17.1107, "step": 28284 }, { "epoch": 0.5170270714899373, "grad_norm": 5.232657715473453, "learning_rate": 4.967420460642553e-06, "loss": 17.0836, "step": 28285 }, { "epoch": 0.5170453506863838, "grad_norm": 6.372387967753305, "learning_rate": 4.967124455445783e-06, "loss": 17.3634, "step": 28286 }, { "epoch": 0.5170636298828304, "grad_norm": 7.721831486024882, "learning_rate": 4.966828450364238e-06, "loss": 17.7065, "step": 28287 }, { "epoch": 0.5170819090792769, "grad_norm": 6.690076463553113, "learning_rate": 4.966532445398958e-06, "loss": 17.6673, "step": 28288 }, { "epoch": 0.5171001882757233, "grad_norm": 7.281341350508655, "learning_rate": 4.966236440550977e-06, "loss": 17.7145, "step": 28289 }, { "epoch": 0.5171184674721699, "grad_norm": 5.559529328863653, "learning_rate": 4.965940435821334e-06, "loss": 17.0352, "step": 28290 }, { "epoch": 0.5171367466686164, "grad_norm": 6.753979170901499, "learning_rate": 4.965644431211069e-06, "loss": 17.2227, "step": 28291 }, { "epoch": 0.517155025865063, "grad_norm": 6.501264889097688, "learning_rate": 4.9653484267212145e-06, "loss": 17.5962, "step": 28292 }, { "epoch": 0.5171733050615095, "grad_norm": 8.685964218239112, "learning_rate": 4.965052422352814e-06, "loss": 18.3715, "step": 28293 }, { "epoch": 0.517191584257956, "grad_norm": 6.520037121411414, "learning_rate": 4.9647564181069e-06, "loss": 17.9027, "step": 28294 }, { "epoch": 0.5172098634544026, "grad_norm": 5.006900722549799, "learning_rate": 4.9644604139845106e-06, "loss": 17.0317, "step": 28295 }, { "epoch": 0.517228142650849, "grad_norm": 6.79245027887876, "learning_rate": 4.964164409986687e-06, "loss": 17.8067, "step": 28296 }, { "epoch": 0.5172464218472956, "grad_norm": 6.425147652690195, "learning_rate": 4.963868406114463e-06, "loss": 17.0715, "step": 28297 }, { "epoch": 0.5172647010437421, "grad_norm": 7.592396400924471, "learning_rate": 4.963572402368877e-06, "loss": 17.6252, "step": 28298 }, { "epoch": 0.5172829802401886, "grad_norm": 4.9459803310968, "learning_rate": 4.9632763987509656e-06, "loss": 16.8748, "step": 28299 }, { "epoch": 0.5173012594366352, "grad_norm": 5.637228125645112, "learning_rate": 4.962980395261769e-06, "loss": 17.0424, "step": 28300 }, { "epoch": 0.5173195386330817, "grad_norm": 6.5955010881627985, "learning_rate": 4.96268439190232e-06, "loss": 17.5066, "step": 28301 }, { "epoch": 0.5173378178295283, "grad_norm": 7.152268273930854, "learning_rate": 4.962388388673661e-06, "loss": 17.9691, "step": 28302 }, { "epoch": 0.5173560970259747, "grad_norm": 6.301281888975413, "learning_rate": 4.962092385576828e-06, "loss": 17.3616, "step": 28303 }, { "epoch": 0.5173743762224212, "grad_norm": 7.623465819388862, "learning_rate": 4.961796382612857e-06, "loss": 18.2055, "step": 28304 }, { "epoch": 0.5173926554188678, "grad_norm": 6.844232173095886, "learning_rate": 4.961500379782787e-06, "loss": 17.559, "step": 28305 }, { "epoch": 0.5174109346153143, "grad_norm": 5.103228395683297, "learning_rate": 4.961204377087654e-06, "loss": 17.0642, "step": 28306 }, { "epoch": 0.5174292138117609, "grad_norm": 5.862556345814915, "learning_rate": 4.9609083745284955e-06, "loss": 17.403, "step": 28307 }, { "epoch": 0.5174474930082074, "grad_norm": 6.84776664844964, "learning_rate": 4.960612372106352e-06, "loss": 17.7825, "step": 28308 }, { "epoch": 0.5174657722046538, "grad_norm": 7.289848890162367, "learning_rate": 4.9603163698222565e-06, "loss": 17.8208, "step": 28309 }, { "epoch": 0.5174840514011004, "grad_norm": 5.779740391739088, "learning_rate": 4.960020367677251e-06, "loss": 17.2748, "step": 28310 }, { "epoch": 0.5175023305975469, "grad_norm": 6.937638054976089, "learning_rate": 4.959724365672369e-06, "loss": 17.5835, "step": 28311 }, { "epoch": 0.5175206097939935, "grad_norm": 5.821591776952739, "learning_rate": 4.95942836380865e-06, "loss": 17.0297, "step": 28312 }, { "epoch": 0.51753888899044, "grad_norm": 7.870391038015023, "learning_rate": 4.959132362087131e-06, "loss": 17.5907, "step": 28313 }, { "epoch": 0.5175571681868865, "grad_norm": 5.963559402433707, "learning_rate": 4.958836360508851e-06, "loss": 17.0072, "step": 28314 }, { "epoch": 0.517575447383333, "grad_norm": 6.138604551563362, "learning_rate": 4.958540359074843e-06, "loss": 17.4719, "step": 28315 }, { "epoch": 0.5175937265797795, "grad_norm": 5.55085953010945, "learning_rate": 4.958244357786149e-06, "loss": 17.0418, "step": 28316 }, { "epoch": 0.5176120057762261, "grad_norm": 6.216162786051773, "learning_rate": 4.957948356643806e-06, "loss": 17.5679, "step": 28317 }, { "epoch": 0.5176302849726726, "grad_norm": 6.1428693500892, "learning_rate": 4.9576523556488485e-06, "loss": 17.5301, "step": 28318 }, { "epoch": 0.5176485641691191, "grad_norm": 5.828606178745438, "learning_rate": 4.957356354802318e-06, "loss": 17.1704, "step": 28319 }, { "epoch": 0.5176668433655657, "grad_norm": 7.199583396032562, "learning_rate": 4.957060354105247e-06, "loss": 17.832, "step": 28320 }, { "epoch": 0.5176851225620122, "grad_norm": 6.743918145778942, "learning_rate": 4.956764353558677e-06, "loss": 17.7326, "step": 28321 }, { "epoch": 0.5177034017584587, "grad_norm": 6.830053726994922, "learning_rate": 4.956468353163646e-06, "loss": 17.6931, "step": 28322 }, { "epoch": 0.5177216809549052, "grad_norm": 5.917704872181462, "learning_rate": 4.956172352921186e-06, "loss": 17.2226, "step": 28323 }, { "epoch": 0.5177399601513517, "grad_norm": 5.256594909885979, "learning_rate": 4.955876352832342e-06, "loss": 17.0001, "step": 28324 }, { "epoch": 0.5177582393477983, "grad_norm": 5.406433740918442, "learning_rate": 4.955580352898145e-06, "loss": 17.2431, "step": 28325 }, { "epoch": 0.5177765185442448, "grad_norm": 5.8918516094395965, "learning_rate": 4.955284353119635e-06, "loss": 17.2941, "step": 28326 }, { "epoch": 0.5177947977406914, "grad_norm": 5.17406305535719, "learning_rate": 4.954988353497851e-06, "loss": 16.8681, "step": 28327 }, { "epoch": 0.5178130769371379, "grad_norm": 5.742955895787697, "learning_rate": 4.954692354033829e-06, "loss": 17.2397, "step": 28328 }, { "epoch": 0.5178313561335843, "grad_norm": 7.038128444420002, "learning_rate": 4.954396354728604e-06, "loss": 17.6647, "step": 28329 }, { "epoch": 0.5178496353300309, "grad_norm": 7.029265376873107, "learning_rate": 4.954100355583217e-06, "loss": 18.0109, "step": 28330 }, { "epoch": 0.5178679145264774, "grad_norm": 5.6742373272957884, "learning_rate": 4.953804356598706e-06, "loss": 17.1585, "step": 28331 }, { "epoch": 0.517886193722924, "grad_norm": 5.766574209184498, "learning_rate": 4.953508357776104e-06, "loss": 17.4039, "step": 28332 }, { "epoch": 0.5179044729193705, "grad_norm": 6.3053895933968525, "learning_rate": 4.953212359116453e-06, "loss": 17.4309, "step": 28333 }, { "epoch": 0.517922752115817, "grad_norm": 7.950056890711074, "learning_rate": 4.9529163606207884e-06, "loss": 17.8308, "step": 28334 }, { "epoch": 0.5179410313122635, "grad_norm": 7.35631073458575, "learning_rate": 4.952620362290146e-06, "loss": 17.8463, "step": 28335 }, { "epoch": 0.51795931050871, "grad_norm": 5.043105007380679, "learning_rate": 4.952324364125567e-06, "loss": 16.6986, "step": 28336 }, { "epoch": 0.5179775897051566, "grad_norm": 4.6970956307817815, "learning_rate": 4.952028366128086e-06, "loss": 16.8207, "step": 28337 }, { "epoch": 0.5179958689016031, "grad_norm": 7.161634770000542, "learning_rate": 4.951732368298743e-06, "loss": 17.5895, "step": 28338 }, { "epoch": 0.5180141480980496, "grad_norm": 6.896067591252059, "learning_rate": 4.951436370638572e-06, "loss": 17.2049, "step": 28339 }, { "epoch": 0.5180324272944962, "grad_norm": 5.907005539784071, "learning_rate": 4.951140373148613e-06, "loss": 17.4387, "step": 28340 }, { "epoch": 0.5180507064909426, "grad_norm": 6.669907739938093, "learning_rate": 4.950844375829903e-06, "loss": 17.532, "step": 28341 }, { "epoch": 0.5180689856873891, "grad_norm": 6.386297552721186, "learning_rate": 4.9505483786834804e-06, "loss": 17.4779, "step": 28342 }, { "epoch": 0.5180872648838357, "grad_norm": 6.555596657950641, "learning_rate": 4.950252381710379e-06, "loss": 17.1734, "step": 28343 }, { "epoch": 0.5181055440802822, "grad_norm": 8.248701041644166, "learning_rate": 4.94995638491164e-06, "loss": 18.5298, "step": 28344 }, { "epoch": 0.5181238232767288, "grad_norm": 5.926592105592358, "learning_rate": 4.9496603882883005e-06, "loss": 17.32, "step": 28345 }, { "epoch": 0.5181421024731753, "grad_norm": 7.876904087450707, "learning_rate": 4.949364391841395e-06, "loss": 17.803, "step": 28346 }, { "epoch": 0.5181603816696218, "grad_norm": 8.87604005395616, "learning_rate": 4.9490683955719645e-06, "loss": 18.6382, "step": 28347 }, { "epoch": 0.5181786608660683, "grad_norm": 6.327237224683834, "learning_rate": 4.948772399481044e-06, "loss": 17.1739, "step": 28348 }, { "epoch": 0.5181969400625148, "grad_norm": 7.09737098314194, "learning_rate": 4.9484764035696705e-06, "loss": 17.6734, "step": 28349 }, { "epoch": 0.5182152192589614, "grad_norm": 12.50251472700511, "learning_rate": 4.9481804078388854e-06, "loss": 17.6391, "step": 28350 }, { "epoch": 0.5182334984554079, "grad_norm": 5.556826248447641, "learning_rate": 4.94788441228972e-06, "loss": 17.31, "step": 28351 }, { "epoch": 0.5182517776518544, "grad_norm": 6.169112154080561, "learning_rate": 4.9475884169232195e-06, "loss": 17.3714, "step": 28352 }, { "epoch": 0.518270056848301, "grad_norm": 6.149369658046421, "learning_rate": 4.947292421740415e-06, "loss": 17.459, "step": 28353 }, { "epoch": 0.5182883360447474, "grad_norm": 7.25027977926675, "learning_rate": 4.9469964267423445e-06, "loss": 17.6783, "step": 28354 }, { "epoch": 0.518306615241194, "grad_norm": 5.705919113560786, "learning_rate": 4.946700431930049e-06, "loss": 17.2594, "step": 28355 }, { "epoch": 0.5183248944376405, "grad_norm": 7.103057322642434, "learning_rate": 4.946404437304565e-06, "loss": 17.7569, "step": 28356 }, { "epoch": 0.518343173634087, "grad_norm": 6.324290224428657, "learning_rate": 4.946108442866925e-06, "loss": 17.4654, "step": 28357 }, { "epoch": 0.5183614528305336, "grad_norm": 5.6742389865285245, "learning_rate": 4.945812448618173e-06, "loss": 17.2231, "step": 28358 }, { "epoch": 0.5183797320269801, "grad_norm": 6.67351229223035, "learning_rate": 4.945516454559343e-06, "loss": 17.6726, "step": 28359 }, { "epoch": 0.5183980112234267, "grad_norm": 5.985127165617485, "learning_rate": 4.945220460691473e-06, "loss": 17.2501, "step": 28360 }, { "epoch": 0.5184162904198731, "grad_norm": 6.790971164288386, "learning_rate": 4.944924467015601e-06, "loss": 17.6443, "step": 28361 }, { "epoch": 0.5184345696163196, "grad_norm": 5.156384856233661, "learning_rate": 4.944628473532763e-06, "loss": 17.082, "step": 28362 }, { "epoch": 0.5184528488127662, "grad_norm": 5.804276280583565, "learning_rate": 4.9443324802439975e-06, "loss": 17.3728, "step": 28363 }, { "epoch": 0.5184711280092127, "grad_norm": 6.887342527283665, "learning_rate": 4.944036487150343e-06, "loss": 18.011, "step": 28364 }, { "epoch": 0.5184894072056593, "grad_norm": 4.909795921075503, "learning_rate": 4.943740494252835e-06, "loss": 16.7328, "step": 28365 }, { "epoch": 0.5185076864021058, "grad_norm": 5.793876673440153, "learning_rate": 4.94344450155251e-06, "loss": 17.0952, "step": 28366 }, { "epoch": 0.5185259655985522, "grad_norm": 5.643399042905578, "learning_rate": 4.94314850905041e-06, "loss": 17.1263, "step": 28367 }, { "epoch": 0.5185442447949988, "grad_norm": 6.337080276201196, "learning_rate": 4.942852516747567e-06, "loss": 17.6168, "step": 28368 }, { "epoch": 0.5185625239914453, "grad_norm": 4.43384728185298, "learning_rate": 4.942556524645023e-06, "loss": 16.618, "step": 28369 }, { "epoch": 0.5185808031878919, "grad_norm": 6.31907727904341, "learning_rate": 4.942260532743813e-06, "loss": 17.5743, "step": 28370 }, { "epoch": 0.5185990823843384, "grad_norm": 6.269384186412205, "learning_rate": 4.9419645410449735e-06, "loss": 17.3451, "step": 28371 }, { "epoch": 0.5186173615807849, "grad_norm": 6.723354654080379, "learning_rate": 4.9416685495495454e-06, "loss": 17.5516, "step": 28372 }, { "epoch": 0.5186356407772315, "grad_norm": 5.644872574713539, "learning_rate": 4.941372558258564e-06, "loss": 17.238, "step": 28373 }, { "epoch": 0.5186539199736779, "grad_norm": 8.335516295278675, "learning_rate": 4.941076567173064e-06, "loss": 17.8968, "step": 28374 }, { "epoch": 0.5186721991701245, "grad_norm": 7.392830132433686, "learning_rate": 4.940780576294087e-06, "loss": 17.701, "step": 28375 }, { "epoch": 0.518690478366571, "grad_norm": 5.7589566603408615, "learning_rate": 4.94048458562267e-06, "loss": 17.3986, "step": 28376 }, { "epoch": 0.5187087575630175, "grad_norm": 6.828373995474195, "learning_rate": 4.940188595159848e-06, "loss": 17.8689, "step": 28377 }, { "epoch": 0.5187270367594641, "grad_norm": 6.074302009423401, "learning_rate": 4.939892604906661e-06, "loss": 17.3894, "step": 28378 }, { "epoch": 0.5187453159559106, "grad_norm": 5.395728480787896, "learning_rate": 4.939596614864144e-06, "loss": 17.1342, "step": 28379 }, { "epoch": 0.5187635951523571, "grad_norm": 7.098541162613564, "learning_rate": 4.9393006250333345e-06, "loss": 17.6606, "step": 28380 }, { "epoch": 0.5187818743488036, "grad_norm": 6.19940468455215, "learning_rate": 4.939004635415274e-06, "loss": 17.1753, "step": 28381 }, { "epoch": 0.5188001535452501, "grad_norm": 5.574690490203115, "learning_rate": 4.938708646010994e-06, "loss": 17.4, "step": 28382 }, { "epoch": 0.5188184327416967, "grad_norm": 7.557797584270141, "learning_rate": 4.9384126568215374e-06, "loss": 18.0713, "step": 28383 }, { "epoch": 0.5188367119381432, "grad_norm": 5.5071146457900335, "learning_rate": 4.938116667847938e-06, "loss": 17.1174, "step": 28384 }, { "epoch": 0.5188549911345898, "grad_norm": 6.239175399354861, "learning_rate": 4.937820679091233e-06, "loss": 17.6445, "step": 28385 }, { "epoch": 0.5188732703310363, "grad_norm": 7.55498510327374, "learning_rate": 4.937524690552464e-06, "loss": 17.7631, "step": 28386 }, { "epoch": 0.5188915495274827, "grad_norm": 6.593389107555483, "learning_rate": 4.937228702232665e-06, "loss": 17.4847, "step": 28387 }, { "epoch": 0.5189098287239293, "grad_norm": 7.227918407171986, "learning_rate": 4.9369327141328715e-06, "loss": 18.1397, "step": 28388 }, { "epoch": 0.5189281079203758, "grad_norm": 7.193422353039811, "learning_rate": 4.936636726254125e-06, "loss": 17.5931, "step": 28389 }, { "epoch": 0.5189463871168224, "grad_norm": 6.521969771777463, "learning_rate": 4.936340738597462e-06, "loss": 17.5764, "step": 28390 }, { "epoch": 0.5189646663132689, "grad_norm": 6.147235511859382, "learning_rate": 4.936044751163917e-06, "loss": 17.5021, "step": 28391 }, { "epoch": 0.5189829455097154, "grad_norm": 4.990153548976036, "learning_rate": 4.9357487639545324e-06, "loss": 16.7748, "step": 28392 }, { "epoch": 0.519001224706162, "grad_norm": 5.926882449078139, "learning_rate": 4.935452776970341e-06, "loss": 17.1332, "step": 28393 }, { "epoch": 0.5190195039026084, "grad_norm": 8.228062422646142, "learning_rate": 4.935156790212381e-06, "loss": 18.3323, "step": 28394 }, { "epoch": 0.519037783099055, "grad_norm": 7.999383086747716, "learning_rate": 4.934860803681693e-06, "loss": 18.1071, "step": 28395 }, { "epoch": 0.5190560622955015, "grad_norm": 5.932543343354322, "learning_rate": 4.934564817379312e-06, "loss": 17.1878, "step": 28396 }, { "epoch": 0.519074341491948, "grad_norm": 5.780339840906645, "learning_rate": 4.934268831306274e-06, "loss": 17.0861, "step": 28397 }, { "epoch": 0.5190926206883946, "grad_norm": 5.877545939180377, "learning_rate": 4.9339728454636194e-06, "loss": 17.6133, "step": 28398 }, { "epoch": 0.519110899884841, "grad_norm": 6.929590337227377, "learning_rate": 4.933676859852383e-06, "loss": 18.037, "step": 28399 }, { "epoch": 0.5191291790812876, "grad_norm": 6.550692562746869, "learning_rate": 4.933380874473605e-06, "loss": 17.5974, "step": 28400 }, { "epoch": 0.5191474582777341, "grad_norm": 7.504334160162672, "learning_rate": 4.933084889328322e-06, "loss": 17.6812, "step": 28401 }, { "epoch": 0.5191657374741806, "grad_norm": 6.277629082262044, "learning_rate": 4.932788904417568e-06, "loss": 17.4072, "step": 28402 }, { "epoch": 0.5191840166706272, "grad_norm": 5.911238000245223, "learning_rate": 4.932492919742384e-06, "loss": 17.0953, "step": 28403 }, { "epoch": 0.5192022958670737, "grad_norm": 5.204696671213171, "learning_rate": 4.932196935303808e-06, "loss": 17.13, "step": 28404 }, { "epoch": 0.5192205750635203, "grad_norm": 5.964727338725323, "learning_rate": 4.931900951102873e-06, "loss": 17.5791, "step": 28405 }, { "epoch": 0.5192388542599667, "grad_norm": 6.512404584723284, "learning_rate": 4.931604967140622e-06, "loss": 17.5078, "step": 28406 }, { "epoch": 0.5192571334564132, "grad_norm": 4.910562497753882, "learning_rate": 4.9313089834180885e-06, "loss": 16.6523, "step": 28407 }, { "epoch": 0.5192754126528598, "grad_norm": 5.419494529033941, "learning_rate": 4.9310129999363095e-06, "loss": 17.4113, "step": 28408 }, { "epoch": 0.5192936918493063, "grad_norm": 7.950816500233542, "learning_rate": 4.930717016696327e-06, "loss": 17.9876, "step": 28409 }, { "epoch": 0.5193119710457528, "grad_norm": 5.0251182567233466, "learning_rate": 4.930421033699175e-06, "loss": 16.789, "step": 28410 }, { "epoch": 0.5193302502421994, "grad_norm": 7.994435891045464, "learning_rate": 4.930125050945889e-06, "loss": 18.0434, "step": 28411 }, { "epoch": 0.5193485294386458, "grad_norm": 6.410436598654899, "learning_rate": 4.929829068437509e-06, "loss": 17.6376, "step": 28412 }, { "epoch": 0.5193668086350924, "grad_norm": 5.663290499919785, "learning_rate": 4.929533086175072e-06, "loss": 17.2351, "step": 28413 }, { "epoch": 0.5193850878315389, "grad_norm": 8.776582529292156, "learning_rate": 4.9292371041596175e-06, "loss": 18.4499, "step": 28414 }, { "epoch": 0.5194033670279854, "grad_norm": 7.041122472572309, "learning_rate": 4.928941122392181e-06, "loss": 17.6232, "step": 28415 }, { "epoch": 0.519421646224432, "grad_norm": 5.731336055102409, "learning_rate": 4.928645140873797e-06, "loss": 17.2532, "step": 28416 }, { "epoch": 0.5194399254208785, "grad_norm": 5.400588933769152, "learning_rate": 4.928349159605506e-06, "loss": 17.0085, "step": 28417 }, { "epoch": 0.5194582046173251, "grad_norm": 7.707068384184171, "learning_rate": 4.928053178588347e-06, "loss": 17.8172, "step": 28418 }, { "epoch": 0.5194764838137715, "grad_norm": 7.437717160223488, "learning_rate": 4.9277571978233526e-06, "loss": 17.7059, "step": 28419 }, { "epoch": 0.519494763010218, "grad_norm": 5.959425894062813, "learning_rate": 4.927461217311566e-06, "loss": 17.3244, "step": 28420 }, { "epoch": 0.5195130422066646, "grad_norm": 5.688353393018982, "learning_rate": 4.92716523705402e-06, "loss": 17.136, "step": 28421 }, { "epoch": 0.5195313214031111, "grad_norm": 7.921413404049765, "learning_rate": 4.926869257051752e-06, "loss": 18.2318, "step": 28422 }, { "epoch": 0.5195496005995577, "grad_norm": 6.218524229888885, "learning_rate": 4.926573277305804e-06, "loss": 17.5586, "step": 28423 }, { "epoch": 0.5195678797960042, "grad_norm": 5.7370631366777385, "learning_rate": 4.926277297817209e-06, "loss": 17.2098, "step": 28424 }, { "epoch": 0.5195861589924506, "grad_norm": 6.801426371435264, "learning_rate": 4.925981318587005e-06, "loss": 17.5821, "step": 28425 }, { "epoch": 0.5196044381888972, "grad_norm": 6.884217335038451, "learning_rate": 4.9256853396162304e-06, "loss": 17.5537, "step": 28426 }, { "epoch": 0.5196227173853437, "grad_norm": 6.938959073811555, "learning_rate": 4.925389360905924e-06, "loss": 17.7079, "step": 28427 }, { "epoch": 0.5196409965817903, "grad_norm": 6.3840040994419835, "learning_rate": 4.925093382457118e-06, "loss": 17.6047, "step": 28428 }, { "epoch": 0.5196592757782368, "grad_norm": 5.493911569407914, "learning_rate": 4.924797404270854e-06, "loss": 17.1116, "step": 28429 }, { "epoch": 0.5196775549746833, "grad_norm": 6.806618236692073, "learning_rate": 4.92450142634817e-06, "loss": 17.8762, "step": 28430 }, { "epoch": 0.5196958341711299, "grad_norm": 6.596538830081444, "learning_rate": 4.924205448690101e-06, "loss": 17.4036, "step": 28431 }, { "epoch": 0.5197141133675763, "grad_norm": 6.1625061284571006, "learning_rate": 4.923909471297687e-06, "loss": 17.5077, "step": 28432 }, { "epoch": 0.5197323925640229, "grad_norm": 5.321676442797134, "learning_rate": 4.923613494171962e-06, "loss": 17.0904, "step": 28433 }, { "epoch": 0.5197506717604694, "grad_norm": 13.33556481310796, "learning_rate": 4.923317517313965e-06, "loss": 19.3512, "step": 28434 }, { "epoch": 0.5197689509569159, "grad_norm": 6.584603473348195, "learning_rate": 4.923021540724735e-06, "loss": 17.5803, "step": 28435 }, { "epoch": 0.5197872301533625, "grad_norm": 7.97873784956204, "learning_rate": 4.9227255644053056e-06, "loss": 17.7348, "step": 28436 }, { "epoch": 0.519805509349809, "grad_norm": 5.609333406609034, "learning_rate": 4.9224295883567185e-06, "loss": 17.2715, "step": 28437 }, { "epoch": 0.5198237885462555, "grad_norm": 6.283377946211638, "learning_rate": 4.922133612580009e-06, "loss": 17.4622, "step": 28438 }, { "epoch": 0.519842067742702, "grad_norm": 7.13110081138636, "learning_rate": 4.921837637076212e-06, "loss": 17.4589, "step": 28439 }, { "epoch": 0.5198603469391485, "grad_norm": 8.620049344265254, "learning_rate": 4.921541661846369e-06, "loss": 18.0009, "step": 28440 }, { "epoch": 0.5198786261355951, "grad_norm": 5.458156654294497, "learning_rate": 4.921245686891517e-06, "loss": 17.1189, "step": 28441 }, { "epoch": 0.5198969053320416, "grad_norm": 5.461605257200562, "learning_rate": 4.92094971221269e-06, "loss": 17.2679, "step": 28442 }, { "epoch": 0.5199151845284882, "grad_norm": 6.077526445575021, "learning_rate": 4.920653737810927e-06, "loss": 17.7665, "step": 28443 }, { "epoch": 0.5199334637249347, "grad_norm": 7.17578423592594, "learning_rate": 4.920357763687265e-06, "loss": 17.5324, "step": 28444 }, { "epoch": 0.5199517429213811, "grad_norm": 5.584351044297349, "learning_rate": 4.920061789842745e-06, "loss": 17.3489, "step": 28445 }, { "epoch": 0.5199700221178277, "grad_norm": 6.857244895720754, "learning_rate": 4.9197658162784015e-06, "loss": 17.819, "step": 28446 }, { "epoch": 0.5199883013142742, "grad_norm": 6.717729242088374, "learning_rate": 4.919469842995269e-06, "loss": 17.7314, "step": 28447 }, { "epoch": 0.5200065805107208, "grad_norm": 6.559535453075689, "learning_rate": 4.91917386999439e-06, "loss": 17.2234, "step": 28448 }, { "epoch": 0.5200248597071673, "grad_norm": 7.06374343637183, "learning_rate": 4.9188778972767996e-06, "loss": 17.3341, "step": 28449 }, { "epoch": 0.5200431389036138, "grad_norm": 5.7664473779447, "learning_rate": 4.918581924843534e-06, "loss": 17.1368, "step": 28450 }, { "epoch": 0.5200614181000603, "grad_norm": 5.871158813757223, "learning_rate": 4.9182859526956324e-06, "loss": 17.2258, "step": 28451 }, { "epoch": 0.5200796972965068, "grad_norm": 5.98428605017562, "learning_rate": 4.917989980834132e-06, "loss": 17.5082, "step": 28452 }, { "epoch": 0.5200979764929534, "grad_norm": 6.733955259138203, "learning_rate": 4.917694009260067e-06, "loss": 17.4531, "step": 28453 }, { "epoch": 0.5201162556893999, "grad_norm": 6.591630512321672, "learning_rate": 4.91739803797448e-06, "loss": 17.7313, "step": 28454 }, { "epoch": 0.5201345348858464, "grad_norm": 7.072901599462475, "learning_rate": 4.9171020669784065e-06, "loss": 17.7049, "step": 28455 }, { "epoch": 0.520152814082293, "grad_norm": 5.093333802295993, "learning_rate": 4.9168060962728795e-06, "loss": 17.0145, "step": 28456 }, { "epoch": 0.5201710932787394, "grad_norm": 6.398399630976842, "learning_rate": 4.916510125858942e-06, "loss": 17.7248, "step": 28457 }, { "epoch": 0.520189372475186, "grad_norm": 7.420138996746665, "learning_rate": 4.91621415573763e-06, "loss": 17.7913, "step": 28458 }, { "epoch": 0.5202076516716325, "grad_norm": 6.666231696793718, "learning_rate": 4.915918185909978e-06, "loss": 17.4272, "step": 28459 }, { "epoch": 0.520225930868079, "grad_norm": 6.8041289241111835, "learning_rate": 4.915622216377028e-06, "loss": 17.7081, "step": 28460 }, { "epoch": 0.5202442100645256, "grad_norm": 5.951706159791261, "learning_rate": 4.915326247139812e-06, "loss": 17.4126, "step": 28461 }, { "epoch": 0.5202624892609721, "grad_norm": 5.490469716778039, "learning_rate": 4.9150302781993715e-06, "loss": 17.169, "step": 28462 }, { "epoch": 0.5202807684574187, "grad_norm": 7.478933155531026, "learning_rate": 4.914734309556744e-06, "loss": 17.8273, "step": 28463 }, { "epoch": 0.5202990476538651, "grad_norm": 5.376059672743149, "learning_rate": 4.914438341212963e-06, "loss": 17.1553, "step": 28464 }, { "epoch": 0.5203173268503116, "grad_norm": 6.858406977413497, "learning_rate": 4.91414237316907e-06, "loss": 17.5945, "step": 28465 }, { "epoch": 0.5203356060467582, "grad_norm": 6.077786951193615, "learning_rate": 4.9138464054261e-06, "loss": 17.3616, "step": 28466 }, { "epoch": 0.5203538852432047, "grad_norm": 6.812812721856576, "learning_rate": 4.913550437985089e-06, "loss": 17.8304, "step": 28467 }, { "epoch": 0.5203721644396513, "grad_norm": 6.2142037331805025, "learning_rate": 4.913254470847079e-06, "loss": 17.566, "step": 28468 }, { "epoch": 0.5203904436360978, "grad_norm": 7.039103235915634, "learning_rate": 4.912958504013104e-06, "loss": 17.8699, "step": 28469 }, { "epoch": 0.5204087228325442, "grad_norm": 7.681828737673268, "learning_rate": 4.9126625374842e-06, "loss": 18.2374, "step": 28470 }, { "epoch": 0.5204270020289908, "grad_norm": 13.307296266008041, "learning_rate": 4.912366571261408e-06, "loss": 17.6047, "step": 28471 }, { "epoch": 0.5204452812254373, "grad_norm": 7.231720035665516, "learning_rate": 4.912070605345764e-06, "loss": 18.2681, "step": 28472 }, { "epoch": 0.5204635604218839, "grad_norm": 6.602924443524286, "learning_rate": 4.911774639738303e-06, "loss": 17.5021, "step": 28473 }, { "epoch": 0.5204818396183304, "grad_norm": 7.883868020494509, "learning_rate": 4.911478674440066e-06, "loss": 18.2676, "step": 28474 }, { "epoch": 0.5205001188147769, "grad_norm": 6.159561162809547, "learning_rate": 4.911182709452086e-06, "loss": 17.2755, "step": 28475 }, { "epoch": 0.5205183980112235, "grad_norm": 5.514469339714249, "learning_rate": 4.910886744775405e-06, "loss": 17.2943, "step": 28476 }, { "epoch": 0.5205366772076699, "grad_norm": 6.324586802480088, "learning_rate": 4.910590780411058e-06, "loss": 17.3471, "step": 28477 }, { "epoch": 0.5205549564041164, "grad_norm": 6.5277273182511175, "learning_rate": 4.910294816360081e-06, "loss": 17.519, "step": 28478 }, { "epoch": 0.520573235600563, "grad_norm": 6.6522125621361115, "learning_rate": 4.909998852623516e-06, "loss": 17.6845, "step": 28479 }, { "epoch": 0.5205915147970095, "grad_norm": 5.550169158965691, "learning_rate": 4.9097028892023955e-06, "loss": 17.0065, "step": 28480 }, { "epoch": 0.5206097939934561, "grad_norm": 5.80747317245644, "learning_rate": 4.909406926097758e-06, "loss": 17.1229, "step": 28481 }, { "epoch": 0.5206280731899026, "grad_norm": 6.619435338361883, "learning_rate": 4.909110963310642e-06, "loss": 17.6486, "step": 28482 }, { "epoch": 0.520646352386349, "grad_norm": 7.424707776529631, "learning_rate": 4.908815000842085e-06, "loss": 17.9898, "step": 28483 }, { "epoch": 0.5206646315827956, "grad_norm": 6.1872392938948035, "learning_rate": 4.908519038693122e-06, "loss": 17.5199, "step": 28484 }, { "epoch": 0.5206829107792421, "grad_norm": 7.727224028795232, "learning_rate": 4.908223076864792e-06, "loss": 17.9094, "step": 28485 }, { "epoch": 0.5207011899756887, "grad_norm": 5.480193590507775, "learning_rate": 4.907927115358133e-06, "loss": 17.1923, "step": 28486 }, { "epoch": 0.5207194691721352, "grad_norm": 6.266386773671442, "learning_rate": 4.907631154174181e-06, "loss": 17.7293, "step": 28487 }, { "epoch": 0.5207377483685817, "grad_norm": 7.352862448510087, "learning_rate": 4.9073351933139744e-06, "loss": 17.8787, "step": 28488 }, { "epoch": 0.5207560275650283, "grad_norm": 6.142563812239842, "learning_rate": 4.9070392327785484e-06, "loss": 17.1949, "step": 28489 }, { "epoch": 0.5207743067614747, "grad_norm": 5.701055702495349, "learning_rate": 4.906743272568942e-06, "loss": 16.9904, "step": 28490 }, { "epoch": 0.5207925859579213, "grad_norm": 5.679934440740015, "learning_rate": 4.906447312686195e-06, "loss": 17.0599, "step": 28491 }, { "epoch": 0.5208108651543678, "grad_norm": 6.686150930820088, "learning_rate": 4.906151353131339e-06, "loss": 17.4449, "step": 28492 }, { "epoch": 0.5208291443508143, "grad_norm": 6.696901803619023, "learning_rate": 4.905855393905415e-06, "loss": 17.3549, "step": 28493 }, { "epoch": 0.5208474235472609, "grad_norm": 6.195181244263033, "learning_rate": 4.905559435009462e-06, "loss": 17.5175, "step": 28494 }, { "epoch": 0.5208657027437074, "grad_norm": 6.59450788418369, "learning_rate": 4.905263476444511e-06, "loss": 17.1538, "step": 28495 }, { "epoch": 0.520883981940154, "grad_norm": 7.408468682901097, "learning_rate": 4.904967518211607e-06, "loss": 17.9047, "step": 28496 }, { "epoch": 0.5209022611366004, "grad_norm": 6.412189614504112, "learning_rate": 4.904671560311782e-06, "loss": 17.4363, "step": 28497 }, { "epoch": 0.5209205403330469, "grad_norm": 6.7656715290992455, "learning_rate": 4.904375602746074e-06, "loss": 17.9125, "step": 28498 }, { "epoch": 0.5209388195294935, "grad_norm": 6.4924780465885945, "learning_rate": 4.904079645515523e-06, "loss": 17.2634, "step": 28499 }, { "epoch": 0.52095709872594, "grad_norm": 6.285596636750341, "learning_rate": 4.9037836886211645e-06, "loss": 17.5109, "step": 28500 }, { "epoch": 0.5209753779223866, "grad_norm": 6.7154283657399025, "learning_rate": 4.903487732064034e-06, "loss": 17.6013, "step": 28501 }, { "epoch": 0.520993657118833, "grad_norm": 6.548987268343913, "learning_rate": 4.903191775845171e-06, "loss": 17.78, "step": 28502 }, { "epoch": 0.5210119363152795, "grad_norm": 5.626496958756712, "learning_rate": 4.9028958199656145e-06, "loss": 17.1568, "step": 28503 }, { "epoch": 0.5210302155117261, "grad_norm": 6.430132484776041, "learning_rate": 4.902599864426397e-06, "loss": 17.5608, "step": 28504 }, { "epoch": 0.5210484947081726, "grad_norm": 6.821738962030881, "learning_rate": 4.902303909228561e-06, "loss": 17.402, "step": 28505 }, { "epoch": 0.5210667739046192, "grad_norm": 7.8250467364491145, "learning_rate": 4.902007954373139e-06, "loss": 18.2186, "step": 28506 }, { "epoch": 0.5210850531010657, "grad_norm": 7.19755452109373, "learning_rate": 4.901711999861172e-06, "loss": 18.0846, "step": 28507 }, { "epoch": 0.5211033322975122, "grad_norm": 8.471044242296147, "learning_rate": 4.901416045693697e-06, "loss": 18.6619, "step": 28508 }, { "epoch": 0.5211216114939587, "grad_norm": 7.159979382803887, "learning_rate": 4.901120091871747e-06, "loss": 18.2145, "step": 28509 }, { "epoch": 0.5211398906904052, "grad_norm": 8.182553049643214, "learning_rate": 4.9008241383963655e-06, "loss": 17.5352, "step": 28510 }, { "epoch": 0.5211581698868518, "grad_norm": 7.407998109603498, "learning_rate": 4.900528185268586e-06, "loss": 18.4521, "step": 28511 }, { "epoch": 0.5211764490832983, "grad_norm": 6.260601819204853, "learning_rate": 4.900232232489445e-06, "loss": 17.4589, "step": 28512 }, { "epoch": 0.5211947282797448, "grad_norm": 5.3650323782935825, "learning_rate": 4.899936280059983e-06, "loss": 17.1716, "step": 28513 }, { "epoch": 0.5212130074761914, "grad_norm": 5.452966579857079, "learning_rate": 4.899640327981237e-06, "loss": 17.1096, "step": 28514 }, { "epoch": 0.5212312866726379, "grad_norm": 6.8104556465352815, "learning_rate": 4.899344376254239e-06, "loss": 17.8224, "step": 28515 }, { "epoch": 0.5212495658690844, "grad_norm": 7.543379328766232, "learning_rate": 4.899048424880033e-06, "loss": 18.0693, "step": 28516 }, { "epoch": 0.5212678450655309, "grad_norm": 5.315562135630225, "learning_rate": 4.898752473859654e-06, "loss": 17.234, "step": 28517 }, { "epoch": 0.5212861242619774, "grad_norm": 7.1049195512248735, "learning_rate": 4.898456523194136e-06, "loss": 17.4867, "step": 28518 }, { "epoch": 0.521304403458424, "grad_norm": 7.272050415416022, "learning_rate": 4.898160572884522e-06, "loss": 17.7739, "step": 28519 }, { "epoch": 0.5213226826548705, "grad_norm": 8.401022379990874, "learning_rate": 4.897864622931845e-06, "loss": 18.3357, "step": 28520 }, { "epoch": 0.5213409618513171, "grad_norm": 8.378815935319116, "learning_rate": 4.897568673337143e-06, "loss": 17.6349, "step": 28521 }, { "epoch": 0.5213592410477635, "grad_norm": 7.1869115979672165, "learning_rate": 4.897272724101456e-06, "loss": 17.6035, "step": 28522 }, { "epoch": 0.52137752024421, "grad_norm": 6.674780567610482, "learning_rate": 4.8969767752258165e-06, "loss": 17.3377, "step": 28523 }, { "epoch": 0.5213957994406566, "grad_norm": 5.367316091649678, "learning_rate": 4.896680826711267e-06, "loss": 17.2292, "step": 28524 }, { "epoch": 0.5214140786371031, "grad_norm": 6.794283867337464, "learning_rate": 4.896384878558841e-06, "loss": 17.968, "step": 28525 }, { "epoch": 0.5214323578335497, "grad_norm": 6.77990135169107, "learning_rate": 4.896088930769576e-06, "loss": 17.4489, "step": 28526 }, { "epoch": 0.5214506370299962, "grad_norm": 8.045400185883338, "learning_rate": 4.895792983344512e-06, "loss": 17.8203, "step": 28527 }, { "epoch": 0.5214689162264426, "grad_norm": 4.902725783251568, "learning_rate": 4.895497036284685e-06, "loss": 16.9511, "step": 28528 }, { "epoch": 0.5214871954228892, "grad_norm": 5.70237133267022, "learning_rate": 4.89520108959113e-06, "loss": 17.1397, "step": 28529 }, { "epoch": 0.5215054746193357, "grad_norm": 6.144524863815836, "learning_rate": 4.894905143264887e-06, "loss": 17.3851, "step": 28530 }, { "epoch": 0.5215237538157823, "grad_norm": 5.899723755150293, "learning_rate": 4.8946091973069935e-06, "loss": 17.5855, "step": 28531 }, { "epoch": 0.5215420330122288, "grad_norm": 5.603984848018888, "learning_rate": 4.894313251718483e-06, "loss": 17.1923, "step": 28532 }, { "epoch": 0.5215603122086753, "grad_norm": 6.277892233137457, "learning_rate": 4.894017306500397e-06, "loss": 17.2843, "step": 28533 }, { "epoch": 0.5215785914051219, "grad_norm": 5.936613727806103, "learning_rate": 4.893721361653771e-06, "loss": 17.5003, "step": 28534 }, { "epoch": 0.5215968706015683, "grad_norm": 7.027533804174499, "learning_rate": 4.893425417179641e-06, "loss": 17.649, "step": 28535 }, { "epoch": 0.5216151497980149, "grad_norm": 6.102147359780212, "learning_rate": 4.893129473079048e-06, "loss": 17.1189, "step": 28536 }, { "epoch": 0.5216334289944614, "grad_norm": 6.998842791815334, "learning_rate": 4.892833529353025e-06, "loss": 17.8941, "step": 28537 }, { "epoch": 0.5216517081909079, "grad_norm": 5.774873116056154, "learning_rate": 4.892537586002613e-06, "loss": 17.2359, "step": 28538 }, { "epoch": 0.5216699873873545, "grad_norm": 5.992399578919574, "learning_rate": 4.8922416430288465e-06, "loss": 17.6229, "step": 28539 }, { "epoch": 0.521688266583801, "grad_norm": 5.756647339843333, "learning_rate": 4.891945700432762e-06, "loss": 17.437, "step": 28540 }, { "epoch": 0.5217065457802476, "grad_norm": 7.315163324355285, "learning_rate": 4.8916497582154015e-06, "loss": 17.5926, "step": 28541 }, { "epoch": 0.521724824976694, "grad_norm": 6.881487766683682, "learning_rate": 4.891353816377798e-06, "loss": 17.6835, "step": 28542 }, { "epoch": 0.5217431041731405, "grad_norm": 5.535896958764953, "learning_rate": 4.891057874920989e-06, "loss": 17.2321, "step": 28543 }, { "epoch": 0.5217613833695871, "grad_norm": 7.536160248991853, "learning_rate": 4.890761933846014e-06, "loss": 18.0044, "step": 28544 }, { "epoch": 0.5217796625660336, "grad_norm": 6.413927128110615, "learning_rate": 4.890465993153909e-06, "loss": 17.4872, "step": 28545 }, { "epoch": 0.5217979417624801, "grad_norm": 7.2266632095213605, "learning_rate": 4.8901700528457094e-06, "loss": 17.9273, "step": 28546 }, { "epoch": 0.5218162209589267, "grad_norm": 5.267232293352199, "learning_rate": 4.889874112922457e-06, "loss": 17.1158, "step": 28547 }, { "epoch": 0.5218345001553731, "grad_norm": 5.643571977378045, "learning_rate": 4.889578173385184e-06, "loss": 17.1832, "step": 28548 }, { "epoch": 0.5218527793518197, "grad_norm": 6.05130260401698, "learning_rate": 4.889282234234929e-06, "loss": 17.6666, "step": 28549 }, { "epoch": 0.5218710585482662, "grad_norm": 7.328747357004344, "learning_rate": 4.8889862954727325e-06, "loss": 17.2468, "step": 28550 }, { "epoch": 0.5218893377447127, "grad_norm": 5.400184968760882, "learning_rate": 4.888690357099628e-06, "loss": 17.0291, "step": 28551 }, { "epoch": 0.5219076169411593, "grad_norm": 7.448530732505496, "learning_rate": 4.888394419116656e-06, "loss": 18.0796, "step": 28552 }, { "epoch": 0.5219258961376058, "grad_norm": 7.59018770133678, "learning_rate": 4.88809848152485e-06, "loss": 18.0727, "step": 28553 }, { "epoch": 0.5219441753340524, "grad_norm": 8.839733041001011, "learning_rate": 4.887802544325249e-06, "loss": 18.1013, "step": 28554 }, { "epoch": 0.5219624545304988, "grad_norm": 5.991160633309579, "learning_rate": 4.887506607518892e-06, "loss": 17.3717, "step": 28555 }, { "epoch": 0.5219807337269453, "grad_norm": 5.253203731855516, "learning_rate": 4.887210671106814e-06, "loss": 16.9025, "step": 28556 }, { "epoch": 0.5219990129233919, "grad_norm": 6.76671894110189, "learning_rate": 4.886914735090053e-06, "loss": 17.3081, "step": 28557 }, { "epoch": 0.5220172921198384, "grad_norm": 7.6298661703180715, "learning_rate": 4.886618799469644e-06, "loss": 17.8484, "step": 28558 }, { "epoch": 0.522035571316285, "grad_norm": 7.2541321687389235, "learning_rate": 4.88632286424663e-06, "loss": 17.596, "step": 28559 }, { "epoch": 0.5220538505127315, "grad_norm": 6.760886726101878, "learning_rate": 4.886026929422041e-06, "loss": 17.3026, "step": 28560 }, { "epoch": 0.5220721297091779, "grad_norm": 5.676519567750497, "learning_rate": 4.885730994996919e-06, "loss": 17.1263, "step": 28561 }, { "epoch": 0.5220904089056245, "grad_norm": 7.21428551073307, "learning_rate": 4.8854350609723e-06, "loss": 17.8924, "step": 28562 }, { "epoch": 0.522108688102071, "grad_norm": 6.416671753889042, "learning_rate": 4.885139127349221e-06, "loss": 17.4339, "step": 28563 }, { "epoch": 0.5221269672985176, "grad_norm": 8.085624888739334, "learning_rate": 4.88484319412872e-06, "loss": 17.1029, "step": 28564 }, { "epoch": 0.5221452464949641, "grad_norm": 6.661496340731978, "learning_rate": 4.884547261311833e-06, "loss": 17.5654, "step": 28565 }, { "epoch": 0.5221635256914106, "grad_norm": 6.730843745277235, "learning_rate": 4.884251328899598e-06, "loss": 17.7507, "step": 28566 }, { "epoch": 0.5221818048878571, "grad_norm": 5.203887170605507, "learning_rate": 4.883955396893053e-06, "loss": 17.069, "step": 28567 }, { "epoch": 0.5222000840843036, "grad_norm": 5.702194749382778, "learning_rate": 4.883659465293231e-06, "loss": 17.133, "step": 28568 }, { "epoch": 0.5222183632807502, "grad_norm": 6.531802448548897, "learning_rate": 4.883363534101176e-06, "loss": 17.7237, "step": 28569 }, { "epoch": 0.5222366424771967, "grad_norm": 5.5281364685397865, "learning_rate": 4.8830676033179205e-06, "loss": 17.1396, "step": 28570 }, { "epoch": 0.5222549216736432, "grad_norm": 7.158311541678934, "learning_rate": 4.882771672944502e-06, "loss": 17.2343, "step": 28571 }, { "epoch": 0.5222732008700898, "grad_norm": 6.385483693936126, "learning_rate": 4.88247574298196e-06, "loss": 17.3761, "step": 28572 }, { "epoch": 0.5222914800665363, "grad_norm": 6.575685810924026, "learning_rate": 4.882179813431331e-06, "loss": 17.5746, "step": 28573 }, { "epoch": 0.5223097592629828, "grad_norm": 5.620502338011076, "learning_rate": 4.88188388429365e-06, "loss": 17.1279, "step": 28574 }, { "epoch": 0.5223280384594293, "grad_norm": 5.458200354751128, "learning_rate": 4.881587955569955e-06, "loss": 17.1399, "step": 28575 }, { "epoch": 0.5223463176558758, "grad_norm": 6.698885551026733, "learning_rate": 4.881292027261286e-06, "loss": 17.4983, "step": 28576 }, { "epoch": 0.5223645968523224, "grad_norm": 5.175300885480785, "learning_rate": 4.880996099368677e-06, "loss": 17.091, "step": 28577 }, { "epoch": 0.5223828760487689, "grad_norm": 5.456395267418592, "learning_rate": 4.880700171893167e-06, "loss": 16.9812, "step": 28578 }, { "epoch": 0.5224011552452155, "grad_norm": 6.832448428618826, "learning_rate": 4.880404244835792e-06, "loss": 17.8258, "step": 28579 }, { "epoch": 0.522419434441662, "grad_norm": 6.287832994819546, "learning_rate": 4.880108318197588e-06, "loss": 17.7479, "step": 28580 }, { "epoch": 0.5224377136381084, "grad_norm": 5.5577425560173435, "learning_rate": 4.879812391979598e-06, "loss": 17.223, "step": 28581 }, { "epoch": 0.522455992834555, "grad_norm": 6.121673541690735, "learning_rate": 4.8795164661828505e-06, "loss": 17.3787, "step": 28582 }, { "epoch": 0.5224742720310015, "grad_norm": 5.665615467954876, "learning_rate": 4.8792205408083915e-06, "loss": 17.0907, "step": 28583 }, { "epoch": 0.5224925512274481, "grad_norm": 8.032322676181629, "learning_rate": 4.878924615857252e-06, "loss": 18.0102, "step": 28584 }, { "epoch": 0.5225108304238946, "grad_norm": 6.827180904425209, "learning_rate": 4.87862869133047e-06, "loss": 17.6581, "step": 28585 }, { "epoch": 0.522529109620341, "grad_norm": 5.908271524091968, "learning_rate": 4.878332767229086e-06, "loss": 17.2425, "step": 28586 }, { "epoch": 0.5225473888167876, "grad_norm": 5.935408688879286, "learning_rate": 4.878036843554136e-06, "loss": 17.4134, "step": 28587 }, { "epoch": 0.5225656680132341, "grad_norm": 5.779912781779714, "learning_rate": 4.877740920306654e-06, "loss": 17.2493, "step": 28588 }, { "epoch": 0.5225839472096807, "grad_norm": 6.2411674179792795, "learning_rate": 4.87744499748768e-06, "loss": 17.5038, "step": 28589 }, { "epoch": 0.5226022264061272, "grad_norm": 6.749663808532885, "learning_rate": 4.877149075098251e-06, "loss": 17.7859, "step": 28590 }, { "epoch": 0.5226205056025737, "grad_norm": 6.399509929554015, "learning_rate": 4.8768531531394035e-06, "loss": 17.3995, "step": 28591 }, { "epoch": 0.5226387847990203, "grad_norm": 5.718918710110861, "learning_rate": 4.876557231612176e-06, "loss": 17.365, "step": 28592 }, { "epoch": 0.5226570639954667, "grad_norm": 6.330719956222392, "learning_rate": 4.876261310517604e-06, "loss": 17.404, "step": 28593 }, { "epoch": 0.5226753431919133, "grad_norm": 6.570372225377883, "learning_rate": 4.875965389856724e-06, "loss": 17.7979, "step": 28594 }, { "epoch": 0.5226936223883598, "grad_norm": 6.830668800842742, "learning_rate": 4.875669469630577e-06, "loss": 17.7618, "step": 28595 }, { "epoch": 0.5227119015848063, "grad_norm": 7.904864307882776, "learning_rate": 4.875373549840197e-06, "loss": 18.4144, "step": 28596 }, { "epoch": 0.5227301807812529, "grad_norm": 5.606781549605206, "learning_rate": 4.87507763048662e-06, "loss": 17.2982, "step": 28597 }, { "epoch": 0.5227484599776994, "grad_norm": 4.72959210553153, "learning_rate": 4.874781711570886e-06, "loss": 16.8386, "step": 28598 }, { "epoch": 0.522766739174146, "grad_norm": 6.458947639581319, "learning_rate": 4.874485793094031e-06, "loss": 17.894, "step": 28599 }, { "epoch": 0.5227850183705924, "grad_norm": 5.920819838995126, "learning_rate": 4.874189875057094e-06, "loss": 17.1354, "step": 28600 }, { "epoch": 0.5228032975670389, "grad_norm": 6.349242723956153, "learning_rate": 4.873893957461111e-06, "loss": 17.4207, "step": 28601 }, { "epoch": 0.5228215767634855, "grad_norm": 6.208539077079403, "learning_rate": 4.873598040307116e-06, "loss": 17.4685, "step": 28602 }, { "epoch": 0.522839855959932, "grad_norm": 7.317078508463828, "learning_rate": 4.87330212359615e-06, "loss": 17.7244, "step": 28603 }, { "epoch": 0.5228581351563786, "grad_norm": 6.723673178658345, "learning_rate": 4.873006207329251e-06, "loss": 17.4472, "step": 28604 }, { "epoch": 0.5228764143528251, "grad_norm": 6.0828055766578695, "learning_rate": 4.872710291507452e-06, "loss": 17.403, "step": 28605 }, { "epoch": 0.5228946935492715, "grad_norm": 5.220242569578839, "learning_rate": 4.872414376131793e-06, "loss": 17.1341, "step": 28606 }, { "epoch": 0.5229129727457181, "grad_norm": 5.79877143035889, "learning_rate": 4.872118461203311e-06, "loss": 17.2194, "step": 28607 }, { "epoch": 0.5229312519421646, "grad_norm": 6.908868440391969, "learning_rate": 4.871822546723041e-06, "loss": 17.7, "step": 28608 }, { "epoch": 0.5229495311386112, "grad_norm": 5.969050187671405, "learning_rate": 4.871526632692024e-06, "loss": 17.2985, "step": 28609 }, { "epoch": 0.5229678103350577, "grad_norm": 6.39895815571215, "learning_rate": 4.871230719111295e-06, "loss": 17.659, "step": 28610 }, { "epoch": 0.5229860895315042, "grad_norm": 6.329881816727737, "learning_rate": 4.870934805981889e-06, "loss": 17.436, "step": 28611 }, { "epoch": 0.5230043687279508, "grad_norm": 5.993809150442772, "learning_rate": 4.870638893304846e-06, "loss": 17.5049, "step": 28612 }, { "epoch": 0.5230226479243972, "grad_norm": 8.424532172633825, "learning_rate": 4.870342981081202e-06, "loss": 17.9168, "step": 28613 }, { "epoch": 0.5230409271208437, "grad_norm": 7.646587726971442, "learning_rate": 4.870047069311997e-06, "loss": 17.668, "step": 28614 }, { "epoch": 0.5230592063172903, "grad_norm": 5.686314147004758, "learning_rate": 4.869751157998264e-06, "loss": 17.1126, "step": 28615 }, { "epoch": 0.5230774855137368, "grad_norm": 6.036282928313144, "learning_rate": 4.8694552471410425e-06, "loss": 17.433, "step": 28616 }, { "epoch": 0.5230957647101834, "grad_norm": 6.994720058312239, "learning_rate": 4.869159336741369e-06, "loss": 17.9136, "step": 28617 }, { "epoch": 0.5231140439066299, "grad_norm": 5.934655269276345, "learning_rate": 4.868863426800281e-06, "loss": 17.4217, "step": 28618 }, { "epoch": 0.5231323231030763, "grad_norm": 5.382820492933522, "learning_rate": 4.868567517318813e-06, "loss": 17.0695, "step": 28619 }, { "epoch": 0.5231506022995229, "grad_norm": 6.584344094198239, "learning_rate": 4.8682716082980065e-06, "loss": 17.3006, "step": 28620 }, { "epoch": 0.5231688814959694, "grad_norm": 6.533655367891564, "learning_rate": 4.867975699738897e-06, "loss": 17.6225, "step": 28621 }, { "epoch": 0.523187160692416, "grad_norm": 5.519509156261628, "learning_rate": 4.8676797916425194e-06, "loss": 17.1227, "step": 28622 }, { "epoch": 0.5232054398888625, "grad_norm": 4.932508531538405, "learning_rate": 4.867383884009915e-06, "loss": 16.9027, "step": 28623 }, { "epoch": 0.523223719085309, "grad_norm": 7.522018420607516, "learning_rate": 4.8670879768421176e-06, "loss": 17.8257, "step": 28624 }, { "epoch": 0.5232419982817555, "grad_norm": 6.990985213410393, "learning_rate": 4.866792070140164e-06, "loss": 17.6341, "step": 28625 }, { "epoch": 0.523260277478202, "grad_norm": 6.369858785666854, "learning_rate": 4.866496163905094e-06, "loss": 17.4816, "step": 28626 }, { "epoch": 0.5232785566746486, "grad_norm": 7.279355469306909, "learning_rate": 4.866200258137944e-06, "loss": 17.7265, "step": 28627 }, { "epoch": 0.5232968358710951, "grad_norm": 6.3077888297320435, "learning_rate": 4.8659043528397484e-06, "loss": 17.2586, "step": 28628 }, { "epoch": 0.5233151150675416, "grad_norm": 6.007628964170743, "learning_rate": 4.865608448011547e-06, "loss": 17.5715, "step": 28629 }, { "epoch": 0.5233333942639882, "grad_norm": 7.651909580589494, "learning_rate": 4.865312543654376e-06, "loss": 17.7631, "step": 28630 }, { "epoch": 0.5233516734604347, "grad_norm": 5.473384197437844, "learning_rate": 4.865016639769275e-06, "loss": 17.071, "step": 28631 }, { "epoch": 0.5233699526568812, "grad_norm": 6.458577390802869, "learning_rate": 4.8647207363572785e-06, "loss": 17.3749, "step": 28632 }, { "epoch": 0.5233882318533277, "grad_norm": 4.945032555018459, "learning_rate": 4.864424833419422e-06, "loss": 17.1227, "step": 28633 }, { "epoch": 0.5234065110497742, "grad_norm": 7.19139568278267, "learning_rate": 4.864128930956746e-06, "loss": 17.7834, "step": 28634 }, { "epoch": 0.5234247902462208, "grad_norm": 5.468476556153672, "learning_rate": 4.863833028970287e-06, "loss": 16.9363, "step": 28635 }, { "epoch": 0.5234430694426673, "grad_norm": 5.719976840796176, "learning_rate": 4.86353712746108e-06, "loss": 17.2282, "step": 28636 }, { "epoch": 0.5234613486391139, "grad_norm": 5.927814075489786, "learning_rate": 4.8632412264301645e-06, "loss": 17.2721, "step": 28637 }, { "epoch": 0.5234796278355603, "grad_norm": 5.846840195343381, "learning_rate": 4.862945325878576e-06, "loss": 16.983, "step": 28638 }, { "epoch": 0.5234979070320068, "grad_norm": 6.206621822029001, "learning_rate": 4.862649425807352e-06, "loss": 17.364, "step": 28639 }, { "epoch": 0.5235161862284534, "grad_norm": 6.532029385335391, "learning_rate": 4.862353526217531e-06, "loss": 17.4888, "step": 28640 }, { "epoch": 0.5235344654248999, "grad_norm": 5.1130084455203635, "learning_rate": 4.86205762711015e-06, "loss": 17.0225, "step": 28641 }, { "epoch": 0.5235527446213465, "grad_norm": 6.754188500007389, "learning_rate": 4.861761728486242e-06, "loss": 17.8322, "step": 28642 }, { "epoch": 0.523571023817793, "grad_norm": 6.50366454538655, "learning_rate": 4.861465830346848e-06, "loss": 17.4779, "step": 28643 }, { "epoch": 0.5235893030142394, "grad_norm": 9.49622504661568, "learning_rate": 4.861169932693004e-06, "loss": 18.3106, "step": 28644 }, { "epoch": 0.523607582210686, "grad_norm": 5.73110752522359, "learning_rate": 4.8608740355257485e-06, "loss": 17.0343, "step": 28645 }, { "epoch": 0.5236258614071325, "grad_norm": 6.458243870159093, "learning_rate": 4.860578138846119e-06, "loss": 17.5289, "step": 28646 }, { "epoch": 0.5236441406035791, "grad_norm": 6.777740444972973, "learning_rate": 4.860282242655147e-06, "loss": 17.5169, "step": 28647 }, { "epoch": 0.5236624198000256, "grad_norm": 7.175666162289647, "learning_rate": 4.859986346953876e-06, "loss": 17.5742, "step": 28648 }, { "epoch": 0.5236806989964721, "grad_norm": 6.058726642855046, "learning_rate": 4.859690451743341e-06, "loss": 17.0269, "step": 28649 }, { "epoch": 0.5236989781929187, "grad_norm": 7.504244029549141, "learning_rate": 4.8593945570245776e-06, "loss": 17.9352, "step": 28650 }, { "epoch": 0.5237172573893651, "grad_norm": 5.674543523973336, "learning_rate": 4.859098662798625e-06, "loss": 17.101, "step": 28651 }, { "epoch": 0.5237355365858117, "grad_norm": 5.386776452218993, "learning_rate": 4.858802769066519e-06, "loss": 16.987, "step": 28652 }, { "epoch": 0.5237538157822582, "grad_norm": 5.170912022530876, "learning_rate": 4.858506875829297e-06, "loss": 16.9947, "step": 28653 }, { "epoch": 0.5237720949787047, "grad_norm": 6.497979812330839, "learning_rate": 4.858210983087997e-06, "loss": 17.4098, "step": 28654 }, { "epoch": 0.5237903741751513, "grad_norm": 5.396850166875037, "learning_rate": 4.857915090843655e-06, "loss": 16.8639, "step": 28655 }, { "epoch": 0.5238086533715978, "grad_norm": 8.161069659580226, "learning_rate": 4.857619199097307e-06, "loss": 18.3507, "step": 28656 }, { "epoch": 0.5238269325680444, "grad_norm": 7.427932251449674, "learning_rate": 4.857323307849992e-06, "loss": 18.1129, "step": 28657 }, { "epoch": 0.5238452117644908, "grad_norm": 7.23740741971263, "learning_rate": 4.857027417102744e-06, "loss": 17.7084, "step": 28658 }, { "epoch": 0.5238634909609373, "grad_norm": 6.949649628765428, "learning_rate": 4.856731526856607e-06, "loss": 17.7779, "step": 28659 }, { "epoch": 0.5238817701573839, "grad_norm": 6.939305662249475, "learning_rate": 4.856435637112612e-06, "loss": 17.4437, "step": 28660 }, { "epoch": 0.5239000493538304, "grad_norm": 8.040513767106534, "learning_rate": 4.856139747871796e-06, "loss": 17.8194, "step": 28661 }, { "epoch": 0.523918328550277, "grad_norm": 5.234498276910926, "learning_rate": 4.8558438591351984e-06, "loss": 16.9883, "step": 28662 }, { "epoch": 0.5239366077467235, "grad_norm": 5.6882015636197725, "learning_rate": 4.8555479709038575e-06, "loss": 17.1606, "step": 28663 }, { "epoch": 0.5239548869431699, "grad_norm": 5.740407646631591, "learning_rate": 4.855252083178806e-06, "loss": 17.3319, "step": 28664 }, { "epoch": 0.5239731661396165, "grad_norm": 5.718372727511673, "learning_rate": 4.854956195961085e-06, "loss": 16.933, "step": 28665 }, { "epoch": 0.523991445336063, "grad_norm": 6.6420807768311025, "learning_rate": 4.854660309251729e-06, "loss": 17.8021, "step": 28666 }, { "epoch": 0.5240097245325096, "grad_norm": 6.545572116505617, "learning_rate": 4.854364423051775e-06, "loss": 17.6041, "step": 28667 }, { "epoch": 0.5240280037289561, "grad_norm": 7.023776003366043, "learning_rate": 4.854068537362264e-06, "loss": 17.6512, "step": 28668 }, { "epoch": 0.5240462829254026, "grad_norm": 6.3335694738473, "learning_rate": 4.853772652184229e-06, "loss": 17.7214, "step": 28669 }, { "epoch": 0.5240645621218492, "grad_norm": 8.615002949894881, "learning_rate": 4.853476767518706e-06, "loss": 18.0568, "step": 28670 }, { "epoch": 0.5240828413182956, "grad_norm": 5.987409289706991, "learning_rate": 4.853180883366736e-06, "loss": 17.322, "step": 28671 }, { "epoch": 0.5241011205147422, "grad_norm": 14.72844723554283, "learning_rate": 4.8528849997293556e-06, "loss": 17.903, "step": 28672 }, { "epoch": 0.5241193997111887, "grad_norm": 7.502380063868678, "learning_rate": 4.852589116607597e-06, "loss": 17.6554, "step": 28673 }, { "epoch": 0.5241376789076352, "grad_norm": 5.156945823290601, "learning_rate": 4.852293234002505e-06, "loss": 16.9923, "step": 28674 }, { "epoch": 0.5241559581040818, "grad_norm": 6.0739783576531305, "learning_rate": 4.851997351915108e-06, "loss": 17.3553, "step": 28675 }, { "epoch": 0.5241742373005283, "grad_norm": 6.034195832265106, "learning_rate": 4.851701470346449e-06, "loss": 17.1783, "step": 28676 }, { "epoch": 0.5241925164969748, "grad_norm": 5.664087929063006, "learning_rate": 4.851405589297566e-06, "loss": 17.241, "step": 28677 }, { "epoch": 0.5242107956934213, "grad_norm": 6.075340909033961, "learning_rate": 4.85110970876949e-06, "loss": 17.2628, "step": 28678 }, { "epoch": 0.5242290748898678, "grad_norm": 6.765987722082619, "learning_rate": 4.850813828763264e-06, "loss": 17.7013, "step": 28679 }, { "epoch": 0.5242473540863144, "grad_norm": 5.71274771896953, "learning_rate": 4.850517949279922e-06, "loss": 17.2508, "step": 28680 }, { "epoch": 0.5242656332827609, "grad_norm": 6.519198357361352, "learning_rate": 4.8502220703205e-06, "loss": 17.5417, "step": 28681 }, { "epoch": 0.5242839124792074, "grad_norm": 6.3193823278258, "learning_rate": 4.849926191886039e-06, "loss": 17.4188, "step": 28682 }, { "epoch": 0.524302191675654, "grad_norm": 5.507761081687611, "learning_rate": 4.849630313977573e-06, "loss": 17.1608, "step": 28683 }, { "epoch": 0.5243204708721004, "grad_norm": 5.776666122232264, "learning_rate": 4.849334436596139e-06, "loss": 16.9994, "step": 28684 }, { "epoch": 0.524338750068547, "grad_norm": 7.7390542660983765, "learning_rate": 4.8490385597427745e-06, "loss": 18.1266, "step": 28685 }, { "epoch": 0.5243570292649935, "grad_norm": 6.413963591555267, "learning_rate": 4.848742683418519e-06, "loss": 17.3086, "step": 28686 }, { "epoch": 0.52437530846144, "grad_norm": 6.415781633774691, "learning_rate": 4.848446807624404e-06, "loss": 17.5619, "step": 28687 }, { "epoch": 0.5243935876578866, "grad_norm": 6.41271687883417, "learning_rate": 4.848150932361471e-06, "loss": 17.5939, "step": 28688 }, { "epoch": 0.524411866854333, "grad_norm": 6.594850199120288, "learning_rate": 4.847855057630756e-06, "loss": 17.723, "step": 28689 }, { "epoch": 0.5244301460507796, "grad_norm": 6.550198528890269, "learning_rate": 4.847559183433296e-06, "loss": 17.7492, "step": 28690 }, { "epoch": 0.5244484252472261, "grad_norm": 7.220340798436696, "learning_rate": 4.847263309770129e-06, "loss": 17.8352, "step": 28691 }, { "epoch": 0.5244667044436726, "grad_norm": 5.511075521973555, "learning_rate": 4.8469674366422885e-06, "loss": 17.3616, "step": 28692 }, { "epoch": 0.5244849836401192, "grad_norm": 6.07969437251479, "learning_rate": 4.846671564050815e-06, "loss": 17.2891, "step": 28693 }, { "epoch": 0.5245032628365657, "grad_norm": 6.277042181576031, "learning_rate": 4.846375691996745e-06, "loss": 17.4882, "step": 28694 }, { "epoch": 0.5245215420330123, "grad_norm": 8.152968328070358, "learning_rate": 4.846079820481113e-06, "loss": 17.9782, "step": 28695 }, { "epoch": 0.5245398212294587, "grad_norm": 6.791582980957194, "learning_rate": 4.845783949504961e-06, "loss": 17.5009, "step": 28696 }, { "epoch": 0.5245581004259052, "grad_norm": 6.489858402076207, "learning_rate": 4.84548807906932e-06, "loss": 17.6425, "step": 28697 }, { "epoch": 0.5245763796223518, "grad_norm": 6.854646747146825, "learning_rate": 4.84519220917523e-06, "loss": 17.766, "step": 28698 }, { "epoch": 0.5245946588187983, "grad_norm": 7.4948507071382275, "learning_rate": 4.844896339823731e-06, "loss": 17.9889, "step": 28699 }, { "epoch": 0.5246129380152449, "grad_norm": 7.274832398975454, "learning_rate": 4.844600471015855e-06, "loss": 17.7356, "step": 28700 }, { "epoch": 0.5246312172116914, "grad_norm": 7.592434054060814, "learning_rate": 4.8443046027526395e-06, "loss": 17.7565, "step": 28701 }, { "epoch": 0.5246494964081378, "grad_norm": 7.306628034976432, "learning_rate": 4.844008735035124e-06, "loss": 17.8487, "step": 28702 }, { "epoch": 0.5246677756045844, "grad_norm": 8.127512072239806, "learning_rate": 4.843712867864345e-06, "loss": 17.7852, "step": 28703 }, { "epoch": 0.5246860548010309, "grad_norm": 6.608577482738582, "learning_rate": 4.843417001241336e-06, "loss": 17.7183, "step": 28704 }, { "epoch": 0.5247043339974775, "grad_norm": 5.1392686140284765, "learning_rate": 4.84312113516714e-06, "loss": 16.972, "step": 28705 }, { "epoch": 0.524722613193924, "grad_norm": 5.622460930992014, "learning_rate": 4.8428252696427884e-06, "loss": 17.2805, "step": 28706 }, { "epoch": 0.5247408923903705, "grad_norm": 6.728131733473552, "learning_rate": 4.842529404669322e-06, "loss": 17.5892, "step": 28707 }, { "epoch": 0.5247591715868171, "grad_norm": 6.667787341536187, "learning_rate": 4.842233540247777e-06, "loss": 17.4749, "step": 28708 }, { "epoch": 0.5247774507832635, "grad_norm": 6.431972698734262, "learning_rate": 4.841937676379188e-06, "loss": 17.8282, "step": 28709 }, { "epoch": 0.5247957299797101, "grad_norm": 6.122433842841607, "learning_rate": 4.841641813064596e-06, "loss": 17.6201, "step": 28710 }, { "epoch": 0.5248140091761566, "grad_norm": 5.797297230345632, "learning_rate": 4.841345950305034e-06, "loss": 17.2922, "step": 28711 }, { "epoch": 0.5248322883726031, "grad_norm": 5.655518893695661, "learning_rate": 4.84105008810154e-06, "loss": 17.2077, "step": 28712 }, { "epoch": 0.5248505675690497, "grad_norm": 6.053768863837141, "learning_rate": 4.840754226455154e-06, "loss": 17.6577, "step": 28713 }, { "epoch": 0.5248688467654962, "grad_norm": 5.519281016861334, "learning_rate": 4.84045836536691e-06, "loss": 17.2577, "step": 28714 }, { "epoch": 0.5248871259619428, "grad_norm": 6.821674791896626, "learning_rate": 4.840162504837844e-06, "loss": 17.5838, "step": 28715 }, { "epoch": 0.5249054051583892, "grad_norm": 7.131856104619915, "learning_rate": 4.839866644868995e-06, "loss": 17.3524, "step": 28716 }, { "epoch": 0.5249236843548357, "grad_norm": 6.5408899826240345, "learning_rate": 4.839570785461401e-06, "loss": 17.4512, "step": 28717 }, { "epoch": 0.5249419635512823, "grad_norm": 5.77154805686396, "learning_rate": 4.839274926616096e-06, "loss": 17.1084, "step": 28718 }, { "epoch": 0.5249602427477288, "grad_norm": 5.1531795568693, "learning_rate": 4.838979068334119e-06, "loss": 17.1768, "step": 28719 }, { "epoch": 0.5249785219441754, "grad_norm": 5.985471990358824, "learning_rate": 4.838683210616505e-06, "loss": 17.449, "step": 28720 }, { "epoch": 0.5249968011406219, "grad_norm": 8.892860157466224, "learning_rate": 4.838387353464295e-06, "loss": 17.9445, "step": 28721 }, { "epoch": 0.5250150803370683, "grad_norm": 6.0620276417726435, "learning_rate": 4.838091496878522e-06, "loss": 17.66, "step": 28722 }, { "epoch": 0.5250333595335149, "grad_norm": 6.106682141346665, "learning_rate": 4.837795640860224e-06, "loss": 17.2064, "step": 28723 }, { "epoch": 0.5250516387299614, "grad_norm": 6.211250277119349, "learning_rate": 4.8374997854104385e-06, "loss": 17.2898, "step": 28724 }, { "epoch": 0.525069917926408, "grad_norm": 6.518622661517581, "learning_rate": 4.8372039305302025e-06, "loss": 17.5631, "step": 28725 }, { "epoch": 0.5250881971228545, "grad_norm": 6.1429629713118485, "learning_rate": 4.836908076220551e-06, "loss": 17.5944, "step": 28726 }, { "epoch": 0.525106476319301, "grad_norm": 6.793614605633182, "learning_rate": 4.836612222482524e-06, "loss": 17.6654, "step": 28727 }, { "epoch": 0.5251247555157476, "grad_norm": 5.079446359635824, "learning_rate": 4.836316369317158e-06, "loss": 16.9357, "step": 28728 }, { "epoch": 0.525143034712194, "grad_norm": 7.305594957389146, "learning_rate": 4.8360205167254865e-06, "loss": 17.7642, "step": 28729 }, { "epoch": 0.5251613139086406, "grad_norm": 7.259000192502282, "learning_rate": 4.83572466470855e-06, "loss": 18.3577, "step": 28730 }, { "epoch": 0.5251795931050871, "grad_norm": 5.5184028071688385, "learning_rate": 4.835428813267385e-06, "loss": 17.3325, "step": 28731 }, { "epoch": 0.5251978723015336, "grad_norm": 5.251727641357589, "learning_rate": 4.8351329624030255e-06, "loss": 16.9968, "step": 28732 }, { "epoch": 0.5252161514979802, "grad_norm": 5.448063703813754, "learning_rate": 4.834837112116514e-06, "loss": 17.137, "step": 28733 }, { "epoch": 0.5252344306944267, "grad_norm": 5.831568472431994, "learning_rate": 4.8345412624088814e-06, "loss": 17.2769, "step": 28734 }, { "epoch": 0.5252527098908732, "grad_norm": 4.8287999413733305, "learning_rate": 4.834245413281167e-06, "loss": 16.7295, "step": 28735 }, { "epoch": 0.5252709890873197, "grad_norm": 5.193900537815957, "learning_rate": 4.83394956473441e-06, "loss": 16.9692, "step": 28736 }, { "epoch": 0.5252892682837662, "grad_norm": 7.100448094310603, "learning_rate": 4.833653716769644e-06, "loss": 17.5512, "step": 28737 }, { "epoch": 0.5253075474802128, "grad_norm": 8.357750688280527, "learning_rate": 4.8333578693879095e-06, "loss": 17.7029, "step": 28738 }, { "epoch": 0.5253258266766593, "grad_norm": 6.653940056829294, "learning_rate": 4.833062022590239e-06, "loss": 17.5639, "step": 28739 }, { "epoch": 0.5253441058731059, "grad_norm": 5.77919097964156, "learning_rate": 4.832766176377671e-06, "loss": 17.1731, "step": 28740 }, { "epoch": 0.5253623850695524, "grad_norm": 6.792309666282276, "learning_rate": 4.832470330751245e-06, "loss": 17.4062, "step": 28741 }, { "epoch": 0.5253806642659988, "grad_norm": 5.637323340332414, "learning_rate": 4.832174485711995e-06, "loss": 17.2939, "step": 28742 }, { "epoch": 0.5253989434624454, "grad_norm": 5.088149111599136, "learning_rate": 4.831878641260959e-06, "loss": 17.0118, "step": 28743 }, { "epoch": 0.5254172226588919, "grad_norm": 6.665107883465812, "learning_rate": 4.831582797399173e-06, "loss": 17.8735, "step": 28744 }, { "epoch": 0.5254355018553385, "grad_norm": 5.934339297536312, "learning_rate": 4.831286954127677e-06, "loss": 17.3122, "step": 28745 }, { "epoch": 0.525453781051785, "grad_norm": 5.522877423594949, "learning_rate": 4.830991111447503e-06, "loss": 16.9918, "step": 28746 }, { "epoch": 0.5254720602482315, "grad_norm": 6.352460929737988, "learning_rate": 4.8306952693596916e-06, "loss": 17.5666, "step": 28747 }, { "epoch": 0.525490339444678, "grad_norm": 6.520708069835449, "learning_rate": 4.83039942786528e-06, "loss": 17.8456, "step": 28748 }, { "epoch": 0.5255086186411245, "grad_norm": 6.266852236870579, "learning_rate": 4.8301035869653006e-06, "loss": 17.4629, "step": 28749 }, { "epoch": 0.525526897837571, "grad_norm": 5.3717780443408, "learning_rate": 4.8298077466607965e-06, "loss": 17.0446, "step": 28750 }, { "epoch": 0.5255451770340176, "grad_norm": 8.064369400960787, "learning_rate": 4.8295119069527984e-06, "loss": 18.0764, "step": 28751 }, { "epoch": 0.5255634562304641, "grad_norm": 5.994749865910325, "learning_rate": 4.829216067842347e-06, "loss": 17.3672, "step": 28752 }, { "epoch": 0.5255817354269107, "grad_norm": 5.7969456856365715, "learning_rate": 4.828920229330482e-06, "loss": 17.4399, "step": 28753 }, { "epoch": 0.5256000146233571, "grad_norm": 7.31493624844024, "learning_rate": 4.8286243914182326e-06, "loss": 18.06, "step": 28754 }, { "epoch": 0.5256182938198036, "grad_norm": 5.753582751787242, "learning_rate": 4.828328554106642e-06, "loss": 17.3053, "step": 28755 }, { "epoch": 0.5256365730162502, "grad_norm": 6.150802565740242, "learning_rate": 4.828032717396744e-06, "loss": 17.4962, "step": 28756 }, { "epoch": 0.5256548522126967, "grad_norm": 6.206272552471612, "learning_rate": 4.827736881289575e-06, "loss": 17.394, "step": 28757 }, { "epoch": 0.5256731314091433, "grad_norm": 6.901267374559965, "learning_rate": 4.8274410457861764e-06, "loss": 17.513, "step": 28758 }, { "epoch": 0.5256914106055898, "grad_norm": 5.536056620931555, "learning_rate": 4.8271452108875815e-06, "loss": 17.0095, "step": 28759 }, { "epoch": 0.5257096898020363, "grad_norm": 4.967540766595819, "learning_rate": 4.826849376594825e-06, "loss": 16.812, "step": 28760 }, { "epoch": 0.5257279689984828, "grad_norm": 5.979739505266952, "learning_rate": 4.826553542908948e-06, "loss": 17.1347, "step": 28761 }, { "epoch": 0.5257462481949293, "grad_norm": 5.699134976416894, "learning_rate": 4.826257709830987e-06, "loss": 17.4132, "step": 28762 }, { "epoch": 0.5257645273913759, "grad_norm": 5.727878449317979, "learning_rate": 4.825961877361975e-06, "loss": 17.2631, "step": 28763 }, { "epoch": 0.5257828065878224, "grad_norm": 7.249990447239445, "learning_rate": 4.825666045502954e-06, "loss": 17.3154, "step": 28764 }, { "epoch": 0.5258010857842689, "grad_norm": 5.949054223126825, "learning_rate": 4.825370214254958e-06, "loss": 17.3387, "step": 28765 }, { "epoch": 0.5258193649807155, "grad_norm": 6.098863907929176, "learning_rate": 4.825074383619022e-06, "loss": 17.4397, "step": 28766 }, { "epoch": 0.525837644177162, "grad_norm": 6.686424863194278, "learning_rate": 4.824778553596188e-06, "loss": 17.3564, "step": 28767 }, { "epoch": 0.5258559233736085, "grad_norm": 5.192021215463849, "learning_rate": 4.824482724187488e-06, "loss": 17.0585, "step": 28768 }, { "epoch": 0.525874202570055, "grad_norm": 5.950977882136102, "learning_rate": 4.8241868953939626e-06, "loss": 17.1144, "step": 28769 }, { "epoch": 0.5258924817665015, "grad_norm": 5.34094818584719, "learning_rate": 4.823891067216645e-06, "loss": 17.1881, "step": 28770 }, { "epoch": 0.5259107609629481, "grad_norm": 6.856814190634476, "learning_rate": 4.823595239656573e-06, "loss": 17.6609, "step": 28771 }, { "epoch": 0.5259290401593946, "grad_norm": 5.639556277583762, "learning_rate": 4.823299412714788e-06, "loss": 17.0494, "step": 28772 }, { "epoch": 0.5259473193558412, "grad_norm": 6.296680540643699, "learning_rate": 4.823003586392322e-06, "loss": 17.4254, "step": 28773 }, { "epoch": 0.5259655985522876, "grad_norm": 6.73354810613954, "learning_rate": 4.822707760690211e-06, "loss": 17.8162, "step": 28774 }, { "epoch": 0.5259838777487341, "grad_norm": 6.5525951676966345, "learning_rate": 4.822411935609496e-06, "loss": 17.1992, "step": 28775 }, { "epoch": 0.5260021569451807, "grad_norm": 8.408037551361863, "learning_rate": 4.822116111151212e-06, "loss": 18.426, "step": 28776 }, { "epoch": 0.5260204361416272, "grad_norm": 6.5854702943143675, "learning_rate": 4.821820287316394e-06, "loss": 17.6198, "step": 28777 }, { "epoch": 0.5260387153380738, "grad_norm": 6.318287058444489, "learning_rate": 4.821524464106082e-06, "loss": 17.3501, "step": 28778 }, { "epoch": 0.5260569945345203, "grad_norm": 5.133348364285311, "learning_rate": 4.8212286415213095e-06, "loss": 17.1092, "step": 28779 }, { "epoch": 0.5260752737309667, "grad_norm": 7.3323036109316515, "learning_rate": 4.820932819563115e-06, "loss": 17.8787, "step": 28780 }, { "epoch": 0.5260935529274133, "grad_norm": 7.388183691942203, "learning_rate": 4.8206369982325375e-06, "loss": 17.8646, "step": 28781 }, { "epoch": 0.5261118321238598, "grad_norm": 5.480323312940068, "learning_rate": 4.820341177530609e-06, "loss": 17.3548, "step": 28782 }, { "epoch": 0.5261301113203064, "grad_norm": 7.074633615873922, "learning_rate": 4.820045357458372e-06, "loss": 17.7369, "step": 28783 }, { "epoch": 0.5261483905167529, "grad_norm": 7.247532050009481, "learning_rate": 4.819749538016859e-06, "loss": 18.0397, "step": 28784 }, { "epoch": 0.5261666697131994, "grad_norm": 6.678261021602981, "learning_rate": 4.819453719207107e-06, "loss": 17.4542, "step": 28785 }, { "epoch": 0.526184948909646, "grad_norm": 5.309826273261194, "learning_rate": 4.819157901030156e-06, "loss": 17.1165, "step": 28786 }, { "epoch": 0.5262032281060924, "grad_norm": 7.008534551252991, "learning_rate": 4.818862083487042e-06, "loss": 18.041, "step": 28787 }, { "epoch": 0.526221507302539, "grad_norm": 5.676848863987564, "learning_rate": 4.818566266578797e-06, "loss": 17.2307, "step": 28788 }, { "epoch": 0.5262397864989855, "grad_norm": 6.501034094267944, "learning_rate": 4.818270450306464e-06, "loss": 17.7014, "step": 28789 }, { "epoch": 0.526258065695432, "grad_norm": 5.961937982620032, "learning_rate": 4.8179746346710775e-06, "loss": 17.4626, "step": 28790 }, { "epoch": 0.5262763448918786, "grad_norm": 5.322464779133777, "learning_rate": 4.817678819673672e-06, "loss": 17.1798, "step": 28791 }, { "epoch": 0.5262946240883251, "grad_norm": 5.726952271737116, "learning_rate": 4.817383005315289e-06, "loss": 17.2633, "step": 28792 }, { "epoch": 0.5263129032847716, "grad_norm": 7.5566700966965765, "learning_rate": 4.8170871915969615e-06, "loss": 18.2115, "step": 28793 }, { "epoch": 0.5263311824812181, "grad_norm": 7.582494015258839, "learning_rate": 4.816791378519726e-06, "loss": 18.143, "step": 28794 }, { "epoch": 0.5263494616776646, "grad_norm": 5.774045152665932, "learning_rate": 4.8164955660846234e-06, "loss": 17.3002, "step": 28795 }, { "epoch": 0.5263677408741112, "grad_norm": 6.097955632273698, "learning_rate": 4.816199754292688e-06, "loss": 17.2827, "step": 28796 }, { "epoch": 0.5263860200705577, "grad_norm": 6.6630289248265795, "learning_rate": 4.815903943144955e-06, "loss": 17.2041, "step": 28797 }, { "epoch": 0.5264042992670043, "grad_norm": 5.932300072459454, "learning_rate": 4.815608132642462e-06, "loss": 17.4194, "step": 28798 }, { "epoch": 0.5264225784634508, "grad_norm": 4.999133806791074, "learning_rate": 4.815312322786247e-06, "loss": 17.0654, "step": 28799 }, { "epoch": 0.5264408576598972, "grad_norm": 6.751328673344176, "learning_rate": 4.8150165135773475e-06, "loss": 17.9876, "step": 28800 }, { "epoch": 0.5264591368563438, "grad_norm": 6.192437654670182, "learning_rate": 4.814720705016799e-06, "loss": 17.6106, "step": 28801 }, { "epoch": 0.5264774160527903, "grad_norm": 7.794274628215912, "learning_rate": 4.814424897105638e-06, "loss": 17.8192, "step": 28802 }, { "epoch": 0.5264956952492369, "grad_norm": 5.323126925697858, "learning_rate": 4.8141290898449e-06, "loss": 17.3072, "step": 28803 }, { "epoch": 0.5265139744456834, "grad_norm": 7.398138475483236, "learning_rate": 4.813833283235626e-06, "loss": 18.1937, "step": 28804 }, { "epoch": 0.5265322536421299, "grad_norm": 6.105252246422766, "learning_rate": 4.8135374772788475e-06, "loss": 17.2926, "step": 28805 }, { "epoch": 0.5265505328385764, "grad_norm": 6.645956131776079, "learning_rate": 4.813241671975607e-06, "loss": 17.6453, "step": 28806 }, { "epoch": 0.5265688120350229, "grad_norm": 5.998042468674564, "learning_rate": 4.812945867326937e-06, "loss": 17.3839, "step": 28807 }, { "epoch": 0.5265870912314695, "grad_norm": 5.410285365863696, "learning_rate": 4.812650063333874e-06, "loss": 17.1879, "step": 28808 }, { "epoch": 0.526605370427916, "grad_norm": 6.562702465446166, "learning_rate": 4.8123542599974584e-06, "loss": 17.4391, "step": 28809 }, { "epoch": 0.5266236496243625, "grad_norm": 7.70544683800876, "learning_rate": 4.812058457318724e-06, "loss": 18.02, "step": 28810 }, { "epoch": 0.5266419288208091, "grad_norm": 6.833309188303262, "learning_rate": 4.811762655298707e-06, "loss": 17.8452, "step": 28811 }, { "epoch": 0.5266602080172555, "grad_norm": 5.996346710112719, "learning_rate": 4.811466853938448e-06, "loss": 17.3942, "step": 28812 }, { "epoch": 0.5266784872137021, "grad_norm": 7.732941784560871, "learning_rate": 4.811171053238978e-06, "loss": 17.989, "step": 28813 }, { "epoch": 0.5266967664101486, "grad_norm": 6.3369818548016354, "learning_rate": 4.8108752532013405e-06, "loss": 17.3566, "step": 28814 }, { "epoch": 0.5267150456065951, "grad_norm": 7.1037375989936455, "learning_rate": 4.810579453826568e-06, "loss": 17.7607, "step": 28815 }, { "epoch": 0.5267333248030417, "grad_norm": 5.608068240597165, "learning_rate": 4.810283655115697e-06, "loss": 17.1247, "step": 28816 }, { "epoch": 0.5267516039994882, "grad_norm": 5.750561928620749, "learning_rate": 4.809987857069766e-06, "loss": 17.0579, "step": 28817 }, { "epoch": 0.5267698831959347, "grad_norm": 7.964696187803064, "learning_rate": 4.809692059689813e-06, "loss": 18.233, "step": 28818 }, { "epoch": 0.5267881623923812, "grad_norm": 5.694170250881407, "learning_rate": 4.809396262976869e-06, "loss": 17.548, "step": 28819 }, { "epoch": 0.5268064415888277, "grad_norm": 5.420672596971406, "learning_rate": 4.809100466931976e-06, "loss": 17.0734, "step": 28820 }, { "epoch": 0.5268247207852743, "grad_norm": 6.091166944828002, "learning_rate": 4.808804671556171e-06, "loss": 17.3475, "step": 28821 }, { "epoch": 0.5268429999817208, "grad_norm": 7.1580787462604, "learning_rate": 4.8085088768504865e-06, "loss": 17.9794, "step": 28822 }, { "epoch": 0.5268612791781673, "grad_norm": 6.468301608303433, "learning_rate": 4.808213082815964e-06, "loss": 17.3555, "step": 28823 }, { "epoch": 0.5268795583746139, "grad_norm": 6.112134604455029, "learning_rate": 4.807917289453637e-06, "loss": 17.3798, "step": 28824 }, { "epoch": 0.5268978375710603, "grad_norm": 5.614336961777815, "learning_rate": 4.807621496764542e-06, "loss": 17.2696, "step": 28825 }, { "epoch": 0.5269161167675069, "grad_norm": 5.986367120200508, "learning_rate": 4.807325704749719e-06, "loss": 17.4735, "step": 28826 }, { "epoch": 0.5269343959639534, "grad_norm": 5.879731673740824, "learning_rate": 4.8070299134102006e-06, "loss": 17.4008, "step": 28827 }, { "epoch": 0.5269526751603999, "grad_norm": 5.533548854367254, "learning_rate": 4.806734122747028e-06, "loss": 17.1912, "step": 28828 }, { "epoch": 0.5269709543568465, "grad_norm": 7.511973988798581, "learning_rate": 4.806438332761234e-06, "loss": 17.5786, "step": 28829 }, { "epoch": 0.526989233553293, "grad_norm": 6.706040907550385, "learning_rate": 4.806142543453857e-06, "loss": 17.6139, "step": 28830 }, { "epoch": 0.5270075127497396, "grad_norm": 7.714620301900427, "learning_rate": 4.805846754825934e-06, "loss": 18.1718, "step": 28831 }, { "epoch": 0.527025791946186, "grad_norm": 6.39458512092261, "learning_rate": 4.805550966878502e-06, "loss": 17.6294, "step": 28832 }, { "epoch": 0.5270440711426325, "grad_norm": 6.84977405622963, "learning_rate": 4.805255179612595e-06, "loss": 17.7036, "step": 28833 }, { "epoch": 0.5270623503390791, "grad_norm": 5.206935118393221, "learning_rate": 4.804959393029253e-06, "loss": 17.0615, "step": 28834 }, { "epoch": 0.5270806295355256, "grad_norm": 6.176374157997757, "learning_rate": 4.804663607129512e-06, "loss": 17.5081, "step": 28835 }, { "epoch": 0.5270989087319722, "grad_norm": 7.04104096253257, "learning_rate": 4.804367821914406e-06, "loss": 17.7588, "step": 28836 }, { "epoch": 0.5271171879284187, "grad_norm": 6.64607566359931, "learning_rate": 4.804072037384976e-06, "loss": 17.6263, "step": 28837 }, { "epoch": 0.5271354671248651, "grad_norm": 6.695391060144659, "learning_rate": 4.803776253542256e-06, "loss": 17.929, "step": 28838 }, { "epoch": 0.5271537463213117, "grad_norm": 5.7556417819184755, "learning_rate": 4.803480470387282e-06, "loss": 17.2578, "step": 28839 }, { "epoch": 0.5271720255177582, "grad_norm": 4.93780507451977, "learning_rate": 4.803184687921093e-06, "loss": 16.9088, "step": 28840 }, { "epoch": 0.5271903047142048, "grad_norm": 6.562483807808549, "learning_rate": 4.802888906144726e-06, "loss": 17.7105, "step": 28841 }, { "epoch": 0.5272085839106513, "grad_norm": 5.767657012647988, "learning_rate": 4.8025931250592135e-06, "loss": 17.2964, "step": 28842 }, { "epoch": 0.5272268631070978, "grad_norm": 5.772017979605431, "learning_rate": 4.802297344665595e-06, "loss": 17.3725, "step": 28843 }, { "epoch": 0.5272451423035444, "grad_norm": 6.119249020066546, "learning_rate": 4.802001564964908e-06, "loss": 17.3731, "step": 28844 }, { "epoch": 0.5272634214999908, "grad_norm": 6.198138569624086, "learning_rate": 4.801705785958189e-06, "loss": 17.6388, "step": 28845 }, { "epoch": 0.5272817006964374, "grad_norm": 7.0646354782869265, "learning_rate": 4.801410007646475e-06, "loss": 17.3207, "step": 28846 }, { "epoch": 0.5272999798928839, "grad_norm": 5.609250364045369, "learning_rate": 4.801114230030799e-06, "loss": 17.3197, "step": 28847 }, { "epoch": 0.5273182590893304, "grad_norm": 5.920393323699768, "learning_rate": 4.800818453112201e-06, "loss": 17.3523, "step": 28848 }, { "epoch": 0.527336538285777, "grad_norm": 6.555831430885845, "learning_rate": 4.800522676891719e-06, "loss": 17.5858, "step": 28849 }, { "epoch": 0.5273548174822235, "grad_norm": 6.300141261394385, "learning_rate": 4.800226901370385e-06, "loss": 17.531, "step": 28850 }, { "epoch": 0.52737309667867, "grad_norm": 6.621617771442162, "learning_rate": 4.799931126549241e-06, "loss": 17.3024, "step": 28851 }, { "epoch": 0.5273913758751165, "grad_norm": 6.508352025331976, "learning_rate": 4.79963535242932e-06, "loss": 17.7978, "step": 28852 }, { "epoch": 0.527409655071563, "grad_norm": 6.263305196475308, "learning_rate": 4.799339579011658e-06, "loss": 17.3442, "step": 28853 }, { "epoch": 0.5274279342680096, "grad_norm": 5.870789556753149, "learning_rate": 4.799043806297296e-06, "loss": 17.2287, "step": 28854 }, { "epoch": 0.5274462134644561, "grad_norm": 7.02265397337617, "learning_rate": 4.798748034287268e-06, "loss": 17.6731, "step": 28855 }, { "epoch": 0.5274644926609027, "grad_norm": 5.772823389684692, "learning_rate": 4.798452262982608e-06, "loss": 16.9305, "step": 28856 }, { "epoch": 0.5274827718573492, "grad_norm": 5.5524350645130145, "learning_rate": 4.7981564923843575e-06, "loss": 17.316, "step": 28857 }, { "epoch": 0.5275010510537956, "grad_norm": 5.601942665543992, "learning_rate": 4.797860722493549e-06, "loss": 17.0819, "step": 28858 }, { "epoch": 0.5275193302502422, "grad_norm": 6.174825587999008, "learning_rate": 4.797564953311223e-06, "loss": 17.2222, "step": 28859 }, { "epoch": 0.5275376094466887, "grad_norm": 7.139356368902403, "learning_rate": 4.797269184838415e-06, "loss": 17.5805, "step": 28860 }, { "epoch": 0.5275558886431353, "grad_norm": 7.096687345433783, "learning_rate": 4.796973417076158e-06, "loss": 17.9524, "step": 28861 }, { "epoch": 0.5275741678395818, "grad_norm": 6.824117822901115, "learning_rate": 4.796677650025493e-06, "loss": 17.9426, "step": 28862 }, { "epoch": 0.5275924470360283, "grad_norm": 4.971978942798934, "learning_rate": 4.796381883687457e-06, "loss": 17.0492, "step": 28863 }, { "epoch": 0.5276107262324748, "grad_norm": 7.267242743867442, "learning_rate": 4.7960861180630815e-06, "loss": 17.8284, "step": 28864 }, { "epoch": 0.5276290054289213, "grad_norm": 7.140995282182595, "learning_rate": 4.7957903531534095e-06, "loss": 17.4981, "step": 28865 }, { "epoch": 0.5276472846253679, "grad_norm": 6.044278588423279, "learning_rate": 4.7954945889594735e-06, "loss": 17.3575, "step": 28866 }, { "epoch": 0.5276655638218144, "grad_norm": 6.165296503275143, "learning_rate": 4.79519882548231e-06, "loss": 17.7388, "step": 28867 }, { "epoch": 0.5276838430182609, "grad_norm": 7.251876738000928, "learning_rate": 4.794903062722959e-06, "loss": 17.8625, "step": 28868 }, { "epoch": 0.5277021222147075, "grad_norm": 5.316511572719626, "learning_rate": 4.794607300682453e-06, "loss": 17.2733, "step": 28869 }, { "epoch": 0.527720401411154, "grad_norm": 4.96127738508365, "learning_rate": 4.794311539361832e-06, "loss": 16.9464, "step": 28870 }, { "epoch": 0.5277386806076005, "grad_norm": 6.128694443346111, "learning_rate": 4.79401577876213e-06, "loss": 17.5274, "step": 28871 }, { "epoch": 0.527756959804047, "grad_norm": 6.106806971745361, "learning_rate": 4.793720018884387e-06, "loss": 17.3225, "step": 28872 }, { "epoch": 0.5277752390004935, "grad_norm": 7.468414650130117, "learning_rate": 4.793424259729634e-06, "loss": 17.8283, "step": 28873 }, { "epoch": 0.5277935181969401, "grad_norm": 7.132019563148453, "learning_rate": 4.7931285012989135e-06, "loss": 17.6442, "step": 28874 }, { "epoch": 0.5278117973933866, "grad_norm": 5.131383202438949, "learning_rate": 4.7928327435932584e-06, "loss": 17.0155, "step": 28875 }, { "epoch": 0.5278300765898332, "grad_norm": 5.292756276479835, "learning_rate": 4.792536986613707e-06, "loss": 17.2849, "step": 28876 }, { "epoch": 0.5278483557862796, "grad_norm": 6.986892692955527, "learning_rate": 4.792241230361297e-06, "loss": 17.7004, "step": 28877 }, { "epoch": 0.5278666349827261, "grad_norm": 6.412828363572758, "learning_rate": 4.791945474837061e-06, "loss": 17.4445, "step": 28878 }, { "epoch": 0.5278849141791727, "grad_norm": 7.324417043389137, "learning_rate": 4.791649720042039e-06, "loss": 17.9936, "step": 28879 }, { "epoch": 0.5279031933756192, "grad_norm": 5.866238112140212, "learning_rate": 4.791353965977268e-06, "loss": 17.3683, "step": 28880 }, { "epoch": 0.5279214725720658, "grad_norm": 6.977715905323436, "learning_rate": 4.791058212643781e-06, "loss": 17.33, "step": 28881 }, { "epoch": 0.5279397517685123, "grad_norm": 7.285617705414452, "learning_rate": 4.790762460042619e-06, "loss": 18.2784, "step": 28882 }, { "epoch": 0.5279580309649587, "grad_norm": 6.124274183913281, "learning_rate": 4.790466708174815e-06, "loss": 17.406, "step": 28883 }, { "epoch": 0.5279763101614053, "grad_norm": 5.342439893740732, "learning_rate": 4.790170957041406e-06, "loss": 16.9759, "step": 28884 }, { "epoch": 0.5279945893578518, "grad_norm": 6.44921547563559, "learning_rate": 4.789875206643432e-06, "loss": 17.5331, "step": 28885 }, { "epoch": 0.5280128685542983, "grad_norm": 6.408641977064421, "learning_rate": 4.789579456981927e-06, "loss": 17.7002, "step": 28886 }, { "epoch": 0.5280311477507449, "grad_norm": 7.119874164463317, "learning_rate": 4.789283708057926e-06, "loss": 18.0914, "step": 28887 }, { "epoch": 0.5280494269471914, "grad_norm": 6.315073171129628, "learning_rate": 4.788987959872468e-06, "loss": 17.3811, "step": 28888 }, { "epoch": 0.528067706143638, "grad_norm": 5.316694993406047, "learning_rate": 4.7886922124265875e-06, "loss": 17.1931, "step": 28889 }, { "epoch": 0.5280859853400844, "grad_norm": 6.151973430956692, "learning_rate": 4.788396465721326e-06, "loss": 17.3438, "step": 28890 }, { "epoch": 0.5281042645365309, "grad_norm": 5.531067691821519, "learning_rate": 4.788100719757715e-06, "loss": 17.2357, "step": 28891 }, { "epoch": 0.5281225437329775, "grad_norm": 8.132472525597233, "learning_rate": 4.787804974536791e-06, "loss": 18.3136, "step": 28892 }, { "epoch": 0.528140822929424, "grad_norm": 6.2304121069382274, "learning_rate": 4.787509230059593e-06, "loss": 17.4463, "step": 28893 }, { "epoch": 0.5281591021258706, "grad_norm": 5.938465286065155, "learning_rate": 4.787213486327158e-06, "loss": 17.3088, "step": 28894 }, { "epoch": 0.5281773813223171, "grad_norm": 6.935893332228987, "learning_rate": 4.786917743340519e-06, "loss": 17.7446, "step": 28895 }, { "epoch": 0.5281956605187635, "grad_norm": 7.08577763734086, "learning_rate": 4.786622001100718e-06, "loss": 17.3099, "step": 28896 }, { "epoch": 0.5282139397152101, "grad_norm": 7.63743022798368, "learning_rate": 4.786326259608785e-06, "loss": 17.802, "step": 28897 }, { "epoch": 0.5282322189116566, "grad_norm": 6.003127141033391, "learning_rate": 4.78603051886576e-06, "loss": 17.5072, "step": 28898 }, { "epoch": 0.5282504981081032, "grad_norm": 6.017089453144707, "learning_rate": 4.785734778872682e-06, "loss": 17.4595, "step": 28899 }, { "epoch": 0.5282687773045497, "grad_norm": 5.361848091579653, "learning_rate": 4.785439039630585e-06, "loss": 17.0804, "step": 28900 }, { "epoch": 0.5282870565009962, "grad_norm": 6.2754717078019375, "learning_rate": 4.785143301140504e-06, "loss": 17.4889, "step": 28901 }, { "epoch": 0.5283053356974428, "grad_norm": 6.365149984308117, "learning_rate": 4.784847563403477e-06, "loss": 17.3507, "step": 28902 }, { "epoch": 0.5283236148938892, "grad_norm": 6.818204922468601, "learning_rate": 4.784551826420542e-06, "loss": 17.5514, "step": 28903 }, { "epoch": 0.5283418940903358, "grad_norm": 5.779721557854343, "learning_rate": 4.784256090192732e-06, "loss": 17.3461, "step": 28904 }, { "epoch": 0.5283601732867823, "grad_norm": 6.774582861024068, "learning_rate": 4.783960354721089e-06, "loss": 17.8016, "step": 28905 }, { "epoch": 0.5283784524832288, "grad_norm": 6.594676569936102, "learning_rate": 4.783664620006642e-06, "loss": 17.6608, "step": 28906 }, { "epoch": 0.5283967316796754, "grad_norm": 6.839665753232639, "learning_rate": 4.783368886050434e-06, "loss": 17.4051, "step": 28907 }, { "epoch": 0.5284150108761219, "grad_norm": 5.488721248285436, "learning_rate": 4.7830731528535e-06, "loss": 17.0675, "step": 28908 }, { "epoch": 0.5284332900725685, "grad_norm": 7.588622775282102, "learning_rate": 4.782777420416874e-06, "loss": 17.9841, "step": 28909 }, { "epoch": 0.5284515692690149, "grad_norm": 5.7470798935263385, "learning_rate": 4.782481688741596e-06, "loss": 17.0947, "step": 28910 }, { "epoch": 0.5284698484654614, "grad_norm": 5.238533635509501, "learning_rate": 4.7821859578287e-06, "loss": 17.1855, "step": 28911 }, { "epoch": 0.528488127661908, "grad_norm": 5.2659604937854985, "learning_rate": 4.781890227679222e-06, "loss": 17.0248, "step": 28912 }, { "epoch": 0.5285064068583545, "grad_norm": 7.881360674751614, "learning_rate": 4.781594498294202e-06, "loss": 17.9199, "step": 28913 }, { "epoch": 0.5285246860548011, "grad_norm": 7.4174814585036595, "learning_rate": 4.781298769674675e-06, "loss": 18.175, "step": 28914 }, { "epoch": 0.5285429652512476, "grad_norm": 7.3494059635963, "learning_rate": 4.7810030418216744e-06, "loss": 17.924, "step": 28915 }, { "epoch": 0.528561244447694, "grad_norm": 5.8011214444685555, "learning_rate": 4.780707314736239e-06, "loss": 17.2422, "step": 28916 }, { "epoch": 0.5285795236441406, "grad_norm": 5.654016475660515, "learning_rate": 4.780411588419408e-06, "loss": 17.0475, "step": 28917 }, { "epoch": 0.5285978028405871, "grad_norm": 5.827169075505775, "learning_rate": 4.780115862872213e-06, "loss": 17.4755, "step": 28918 }, { "epoch": 0.5286160820370337, "grad_norm": 6.205762913996296, "learning_rate": 4.779820138095694e-06, "loss": 17.5518, "step": 28919 }, { "epoch": 0.5286343612334802, "grad_norm": 6.311415581467795, "learning_rate": 4.7795244140908845e-06, "loss": 17.6736, "step": 28920 }, { "epoch": 0.5286526404299267, "grad_norm": 6.67700343394037, "learning_rate": 4.779228690858825e-06, "loss": 17.8366, "step": 28921 }, { "epoch": 0.5286709196263732, "grad_norm": 4.573150850930606, "learning_rate": 4.7789329684005494e-06, "loss": 16.7168, "step": 28922 }, { "epoch": 0.5286891988228197, "grad_norm": 5.597287935428627, "learning_rate": 4.778637246717093e-06, "loss": 17.0395, "step": 28923 }, { "epoch": 0.5287074780192663, "grad_norm": 5.664359525650026, "learning_rate": 4.778341525809496e-06, "loss": 17.1411, "step": 28924 }, { "epoch": 0.5287257572157128, "grad_norm": 5.99321077912104, "learning_rate": 4.778045805678792e-06, "loss": 17.6294, "step": 28925 }, { "epoch": 0.5287440364121593, "grad_norm": 6.917453724948669, "learning_rate": 4.777750086326017e-06, "loss": 17.4843, "step": 28926 }, { "epoch": 0.5287623156086059, "grad_norm": 6.605667314495165, "learning_rate": 4.77745436775221e-06, "loss": 17.5499, "step": 28927 }, { "epoch": 0.5287805948050524, "grad_norm": 5.684851313266677, "learning_rate": 4.777158649958407e-06, "loss": 17.1861, "step": 28928 }, { "epoch": 0.5287988740014989, "grad_norm": 5.556901423251663, "learning_rate": 4.776862932945641e-06, "loss": 17.0909, "step": 28929 }, { "epoch": 0.5288171531979454, "grad_norm": 7.655543176993401, "learning_rate": 4.776567216714952e-06, "loss": 17.8859, "step": 28930 }, { "epoch": 0.5288354323943919, "grad_norm": 6.340572271328863, "learning_rate": 4.776271501267377e-06, "loss": 17.4977, "step": 28931 }, { "epoch": 0.5288537115908385, "grad_norm": 6.857807800546154, "learning_rate": 4.775975786603949e-06, "loss": 17.8677, "step": 28932 }, { "epoch": 0.528871990787285, "grad_norm": 6.873222102734115, "learning_rate": 4.775680072725708e-06, "loss": 17.9672, "step": 28933 }, { "epoch": 0.5288902699837316, "grad_norm": 5.399505905359774, "learning_rate": 4.775384359633688e-06, "loss": 17.1648, "step": 28934 }, { "epoch": 0.528908549180178, "grad_norm": 7.014687734891507, "learning_rate": 4.775088647328925e-06, "loss": 17.6098, "step": 28935 }, { "epoch": 0.5289268283766245, "grad_norm": 6.4533415281168285, "learning_rate": 4.7747929358124595e-06, "loss": 17.6374, "step": 28936 }, { "epoch": 0.5289451075730711, "grad_norm": 5.176733574346096, "learning_rate": 4.774497225085323e-06, "loss": 17.2231, "step": 28937 }, { "epoch": 0.5289633867695176, "grad_norm": 5.686831397646956, "learning_rate": 4.774201515148556e-06, "loss": 17.0373, "step": 28938 }, { "epoch": 0.5289816659659642, "grad_norm": 6.436715675593961, "learning_rate": 4.773905806003193e-06, "loss": 17.3613, "step": 28939 }, { "epoch": 0.5289999451624107, "grad_norm": 5.688844862169735, "learning_rate": 4.773610097650268e-06, "loss": 17.2999, "step": 28940 }, { "epoch": 0.5290182243588571, "grad_norm": 6.76112525919375, "learning_rate": 4.773314390090823e-06, "loss": 17.5339, "step": 28941 }, { "epoch": 0.5290365035553037, "grad_norm": 6.521896327749828, "learning_rate": 4.77301868332589e-06, "loss": 17.6273, "step": 28942 }, { "epoch": 0.5290547827517502, "grad_norm": 6.340256283989966, "learning_rate": 4.772722977356507e-06, "loss": 17.3061, "step": 28943 }, { "epoch": 0.5290730619481968, "grad_norm": 7.579506806029583, "learning_rate": 4.77242727218371e-06, "loss": 18.4786, "step": 28944 }, { "epoch": 0.5290913411446433, "grad_norm": 5.748663241019761, "learning_rate": 4.7721315678085364e-06, "loss": 17.0845, "step": 28945 }, { "epoch": 0.5291096203410898, "grad_norm": 7.145918408295226, "learning_rate": 4.77183586423202e-06, "loss": 17.7638, "step": 28946 }, { "epoch": 0.5291278995375364, "grad_norm": 7.635723725813082, "learning_rate": 4.7715401614552e-06, "loss": 17.8344, "step": 28947 }, { "epoch": 0.5291461787339828, "grad_norm": 7.6730095577825885, "learning_rate": 4.771244459479114e-06, "loss": 17.984, "step": 28948 }, { "epoch": 0.5291644579304294, "grad_norm": 6.272690059928203, "learning_rate": 4.770948758304793e-06, "loss": 17.4534, "step": 28949 }, { "epoch": 0.5291827371268759, "grad_norm": 6.233938132678369, "learning_rate": 4.77065305793328e-06, "loss": 17.4707, "step": 28950 }, { "epoch": 0.5292010163233224, "grad_norm": 6.405967934080573, "learning_rate": 4.770357358365605e-06, "loss": 17.6681, "step": 28951 }, { "epoch": 0.529219295519769, "grad_norm": 6.115772521647002, "learning_rate": 4.770061659602809e-06, "loss": 17.5208, "step": 28952 }, { "epoch": 0.5292375747162155, "grad_norm": 9.511511786835976, "learning_rate": 4.769765961645928e-06, "loss": 18.0441, "step": 28953 }, { "epoch": 0.5292558539126619, "grad_norm": 7.4884185891622845, "learning_rate": 4.769470264495995e-06, "loss": 17.8858, "step": 28954 }, { "epoch": 0.5292741331091085, "grad_norm": 6.190588943265005, "learning_rate": 4.769174568154052e-06, "loss": 17.4059, "step": 28955 }, { "epoch": 0.529292412305555, "grad_norm": 5.628795109903128, "learning_rate": 4.768878872621129e-06, "loss": 17.2416, "step": 28956 }, { "epoch": 0.5293106915020016, "grad_norm": 5.738085228999053, "learning_rate": 4.7685831778982656e-06, "loss": 17.4048, "step": 28957 }, { "epoch": 0.5293289706984481, "grad_norm": 6.150061708058466, "learning_rate": 4.7682874839865005e-06, "loss": 17.4161, "step": 28958 }, { "epoch": 0.5293472498948946, "grad_norm": 6.365318945287678, "learning_rate": 4.767991790886866e-06, "loss": 17.4476, "step": 28959 }, { "epoch": 0.5293655290913412, "grad_norm": 6.330916463834369, "learning_rate": 4.7676960986004e-06, "loss": 17.4031, "step": 28960 }, { "epoch": 0.5293838082877876, "grad_norm": 5.79164312744024, "learning_rate": 4.76740040712814e-06, "loss": 17.3763, "step": 28961 }, { "epoch": 0.5294020874842342, "grad_norm": 6.62474071391404, "learning_rate": 4.767104716471122e-06, "loss": 17.5816, "step": 28962 }, { "epoch": 0.5294203666806807, "grad_norm": 6.355841527629139, "learning_rate": 4.766809026630378e-06, "loss": 17.5715, "step": 28963 }, { "epoch": 0.5294386458771272, "grad_norm": 5.728055133066767, "learning_rate": 4.766513337606952e-06, "loss": 17.4855, "step": 28964 }, { "epoch": 0.5294569250735738, "grad_norm": 6.528204563701519, "learning_rate": 4.766217649401875e-06, "loss": 17.4504, "step": 28965 }, { "epoch": 0.5294752042700203, "grad_norm": 5.622772939513933, "learning_rate": 4.7659219620161845e-06, "loss": 17.205, "step": 28966 }, { "epoch": 0.5294934834664669, "grad_norm": 5.9634311597967535, "learning_rate": 4.765626275450918e-06, "loss": 17.397, "step": 28967 }, { "epoch": 0.5295117626629133, "grad_norm": 6.9254273620727655, "learning_rate": 4.76533058970711e-06, "loss": 17.3841, "step": 28968 }, { "epoch": 0.5295300418593598, "grad_norm": 5.182212194444598, "learning_rate": 4.7650349047858e-06, "loss": 16.9626, "step": 28969 }, { "epoch": 0.5295483210558064, "grad_norm": 5.5897143284923665, "learning_rate": 4.76473922068802e-06, "loss": 17.1623, "step": 28970 }, { "epoch": 0.5295666002522529, "grad_norm": 6.547672645916321, "learning_rate": 4.764443537414809e-06, "loss": 17.294, "step": 28971 }, { "epoch": 0.5295848794486995, "grad_norm": 6.335219448620369, "learning_rate": 4.764147854967205e-06, "loss": 17.1706, "step": 28972 }, { "epoch": 0.529603158645146, "grad_norm": 6.512345272851514, "learning_rate": 4.763852173346242e-06, "loss": 17.5477, "step": 28973 }, { "epoch": 0.5296214378415924, "grad_norm": 7.126635933444644, "learning_rate": 4.763556492552954e-06, "loss": 17.4364, "step": 28974 }, { "epoch": 0.529639717038039, "grad_norm": 6.699586223759229, "learning_rate": 4.763260812588381e-06, "loss": 17.5306, "step": 28975 }, { "epoch": 0.5296579962344855, "grad_norm": 6.2770136583172516, "learning_rate": 4.76296513345356e-06, "loss": 17.2066, "step": 28976 }, { "epoch": 0.5296762754309321, "grad_norm": 6.664630342097273, "learning_rate": 4.762669455149523e-06, "loss": 17.6957, "step": 28977 }, { "epoch": 0.5296945546273786, "grad_norm": 6.334087876642642, "learning_rate": 4.7623737776773125e-06, "loss": 17.3825, "step": 28978 }, { "epoch": 0.5297128338238251, "grad_norm": 7.664843603961732, "learning_rate": 4.762078101037959e-06, "loss": 17.9372, "step": 28979 }, { "epoch": 0.5297311130202716, "grad_norm": 6.2353192056173405, "learning_rate": 4.7617824252324995e-06, "loss": 17.274, "step": 28980 }, { "epoch": 0.5297493922167181, "grad_norm": 9.97945252779588, "learning_rate": 4.761486750261975e-06, "loss": 19.1664, "step": 28981 }, { "epoch": 0.5297676714131647, "grad_norm": 7.069265974511238, "learning_rate": 4.761191076127416e-06, "loss": 17.753, "step": 28982 }, { "epoch": 0.5297859506096112, "grad_norm": 6.040235588954766, "learning_rate": 4.760895402829864e-06, "loss": 17.5188, "step": 28983 }, { "epoch": 0.5298042298060577, "grad_norm": 7.13098589882815, "learning_rate": 4.760599730370352e-06, "loss": 17.8517, "step": 28984 }, { "epoch": 0.5298225090025043, "grad_norm": 6.561225109909866, "learning_rate": 4.7603040587499165e-06, "loss": 17.468, "step": 28985 }, { "epoch": 0.5298407881989508, "grad_norm": 6.197798029895168, "learning_rate": 4.7600083879695954e-06, "loss": 17.4974, "step": 28986 }, { "epoch": 0.5298590673953973, "grad_norm": 6.746513134685152, "learning_rate": 4.759712718030425e-06, "loss": 17.6516, "step": 28987 }, { "epoch": 0.5298773465918438, "grad_norm": 8.384064048282763, "learning_rate": 4.759417048933438e-06, "loss": 18.0163, "step": 28988 }, { "epoch": 0.5298956257882903, "grad_norm": 7.545582877617902, "learning_rate": 4.759121380679674e-06, "loss": 17.6561, "step": 28989 }, { "epoch": 0.5299139049847369, "grad_norm": 7.380961876726133, "learning_rate": 4.758825713270171e-06, "loss": 17.7723, "step": 28990 }, { "epoch": 0.5299321841811834, "grad_norm": 7.403514377235956, "learning_rate": 4.758530046705961e-06, "loss": 17.621, "step": 28991 }, { "epoch": 0.52995046337763, "grad_norm": 7.6176915257901765, "learning_rate": 4.758234380988083e-06, "loss": 17.9377, "step": 28992 }, { "epoch": 0.5299687425740764, "grad_norm": 5.851218543396473, "learning_rate": 4.757938716117572e-06, "loss": 17.5295, "step": 28993 }, { "epoch": 0.5299870217705229, "grad_norm": 5.357925665484355, "learning_rate": 4.757643052095464e-06, "loss": 17.1055, "step": 28994 }, { "epoch": 0.5300053009669695, "grad_norm": 6.815637251884852, "learning_rate": 4.757347388922797e-06, "loss": 17.5178, "step": 28995 }, { "epoch": 0.530023580163416, "grad_norm": 5.519619079182495, "learning_rate": 4.757051726600606e-06, "loss": 17.2082, "step": 28996 }, { "epoch": 0.5300418593598626, "grad_norm": 6.284457709423418, "learning_rate": 4.756756065129929e-06, "loss": 17.5382, "step": 28997 }, { "epoch": 0.5300601385563091, "grad_norm": 6.20993452893053, "learning_rate": 4.756460404511799e-06, "loss": 17.1413, "step": 28998 }, { "epoch": 0.5300784177527555, "grad_norm": 6.393283012192629, "learning_rate": 4.7561647447472545e-06, "loss": 17.4477, "step": 28999 }, { "epoch": 0.5300966969492021, "grad_norm": 5.622959309911061, "learning_rate": 4.755869085837333e-06, "loss": 17.1082, "step": 29000 }, { "epoch": 0.5301149761456486, "grad_norm": 5.020867332350183, "learning_rate": 4.755573427783068e-06, "loss": 17.0118, "step": 29001 }, { "epoch": 0.5301332553420952, "grad_norm": 5.03247731379233, "learning_rate": 4.755277770585496e-06, "loss": 16.8708, "step": 29002 }, { "epoch": 0.5301515345385417, "grad_norm": 7.435931180725855, "learning_rate": 4.754982114245655e-06, "loss": 17.8667, "step": 29003 }, { "epoch": 0.5301698137349882, "grad_norm": 6.99032306287987, "learning_rate": 4.754686458764582e-06, "loss": 17.8947, "step": 29004 }, { "epoch": 0.5301880929314348, "grad_norm": 7.200105281796414, "learning_rate": 4.754390804143309e-06, "loss": 18.0311, "step": 29005 }, { "epoch": 0.5302063721278812, "grad_norm": 4.986183366678657, "learning_rate": 4.754095150382876e-06, "loss": 16.965, "step": 29006 }, { "epoch": 0.5302246513243278, "grad_norm": 6.233305622340141, "learning_rate": 4.753799497484319e-06, "loss": 17.4663, "step": 29007 }, { "epoch": 0.5302429305207743, "grad_norm": 5.673532705218475, "learning_rate": 4.753503845448672e-06, "loss": 17.0611, "step": 29008 }, { "epoch": 0.5302612097172208, "grad_norm": 5.690178376425513, "learning_rate": 4.753208194276974e-06, "loss": 17.1658, "step": 29009 }, { "epoch": 0.5302794889136674, "grad_norm": 6.673679623648136, "learning_rate": 4.7529125439702594e-06, "loss": 17.9059, "step": 29010 }, { "epoch": 0.5302977681101139, "grad_norm": 6.457653997418939, "learning_rate": 4.752616894529564e-06, "loss": 17.2834, "step": 29011 }, { "epoch": 0.5303160473065605, "grad_norm": 5.788563805288343, "learning_rate": 4.752321245955927e-06, "loss": 17.2688, "step": 29012 }, { "epoch": 0.5303343265030069, "grad_norm": 7.02269511892852, "learning_rate": 4.752025598250379e-06, "loss": 17.782, "step": 29013 }, { "epoch": 0.5303526056994534, "grad_norm": 6.536583697592861, "learning_rate": 4.751729951413963e-06, "loss": 17.5823, "step": 29014 }, { "epoch": 0.5303708848959, "grad_norm": 6.175956006660992, "learning_rate": 4.7514343054477105e-06, "loss": 17.1392, "step": 29015 }, { "epoch": 0.5303891640923465, "grad_norm": 6.52542404024492, "learning_rate": 4.751138660352659e-06, "loss": 17.2988, "step": 29016 }, { "epoch": 0.5304074432887931, "grad_norm": 5.389273970877506, "learning_rate": 4.750843016129846e-06, "loss": 17.0757, "step": 29017 }, { "epoch": 0.5304257224852396, "grad_norm": 6.227759632651717, "learning_rate": 4.750547372780308e-06, "loss": 17.5329, "step": 29018 }, { "epoch": 0.530444001681686, "grad_norm": 7.032243624524673, "learning_rate": 4.750251730305077e-06, "loss": 18.0772, "step": 29019 }, { "epoch": 0.5304622808781326, "grad_norm": 8.65552082768172, "learning_rate": 4.749956088705192e-06, "loss": 17.8541, "step": 29020 }, { "epoch": 0.5304805600745791, "grad_norm": 7.130319468662974, "learning_rate": 4.749660447981691e-06, "loss": 17.8018, "step": 29021 }, { "epoch": 0.5304988392710256, "grad_norm": 7.080552813274937, "learning_rate": 4.749364808135607e-06, "loss": 17.7129, "step": 29022 }, { "epoch": 0.5305171184674722, "grad_norm": 5.794984351054141, "learning_rate": 4.749069169167979e-06, "loss": 17.3477, "step": 29023 }, { "epoch": 0.5305353976639187, "grad_norm": 5.624996959233557, "learning_rate": 4.7487735310798405e-06, "loss": 17.1875, "step": 29024 }, { "epoch": 0.5305536768603653, "grad_norm": 5.469560987892032, "learning_rate": 4.7484778938722285e-06, "loss": 17.2285, "step": 29025 }, { "epoch": 0.5305719560568117, "grad_norm": 5.890981220322474, "learning_rate": 4.748182257546181e-06, "loss": 16.9253, "step": 29026 }, { "epoch": 0.5305902352532582, "grad_norm": 8.778371523133774, "learning_rate": 4.747886622102731e-06, "loss": 18.3411, "step": 29027 }, { "epoch": 0.5306085144497048, "grad_norm": 5.650137184782942, "learning_rate": 4.747590987542919e-06, "loss": 17.1306, "step": 29028 }, { "epoch": 0.5306267936461513, "grad_norm": 6.961102745151707, "learning_rate": 4.747295353867778e-06, "loss": 18.2216, "step": 29029 }, { "epoch": 0.5306450728425979, "grad_norm": 6.470423668539671, "learning_rate": 4.7469997210783435e-06, "loss": 17.6332, "step": 29030 }, { "epoch": 0.5306633520390444, "grad_norm": 7.04058675641339, "learning_rate": 4.746704089175655e-06, "loss": 17.6692, "step": 29031 }, { "epoch": 0.5306816312354908, "grad_norm": 7.120824994109516, "learning_rate": 4.7464084581607465e-06, "loss": 17.4945, "step": 29032 }, { "epoch": 0.5306999104319374, "grad_norm": 7.813392016753592, "learning_rate": 4.746112828034653e-06, "loss": 17.6192, "step": 29033 }, { "epoch": 0.5307181896283839, "grad_norm": 6.513456589941889, "learning_rate": 4.745817198798412e-06, "loss": 17.3292, "step": 29034 }, { "epoch": 0.5307364688248305, "grad_norm": 6.6167338676562135, "learning_rate": 4.745521570453061e-06, "loss": 17.394, "step": 29035 }, { "epoch": 0.530754748021277, "grad_norm": 8.437965719621715, "learning_rate": 4.745225942999633e-06, "loss": 18.5355, "step": 29036 }, { "epoch": 0.5307730272177235, "grad_norm": 8.917662320778536, "learning_rate": 4.744930316439168e-06, "loss": 18.2374, "step": 29037 }, { "epoch": 0.53079130641417, "grad_norm": 6.990175887722325, "learning_rate": 4.744634690772699e-06, "loss": 17.5489, "step": 29038 }, { "epoch": 0.5308095856106165, "grad_norm": 6.7485057341990675, "learning_rate": 4.744339066001262e-06, "loss": 17.3745, "step": 29039 }, { "epoch": 0.5308278648070631, "grad_norm": 7.143291266208228, "learning_rate": 4.744043442125897e-06, "loss": 17.8604, "step": 29040 }, { "epoch": 0.5308461440035096, "grad_norm": 6.971640177308801, "learning_rate": 4.743747819147637e-06, "loss": 17.62, "step": 29041 }, { "epoch": 0.5308644231999561, "grad_norm": 7.976051122820207, "learning_rate": 4.743452197067516e-06, "loss": 17.6011, "step": 29042 }, { "epoch": 0.5308827023964027, "grad_norm": 6.391408805176334, "learning_rate": 4.743156575886575e-06, "loss": 17.0996, "step": 29043 }, { "epoch": 0.5309009815928492, "grad_norm": 4.956450882770411, "learning_rate": 4.742860955605846e-06, "loss": 16.9487, "step": 29044 }, { "epoch": 0.5309192607892957, "grad_norm": 7.895859674510243, "learning_rate": 4.74256533622637e-06, "loss": 17.883, "step": 29045 }, { "epoch": 0.5309375399857422, "grad_norm": 7.879627065172488, "learning_rate": 4.74226971774918e-06, "loss": 17.886, "step": 29046 }, { "epoch": 0.5309558191821887, "grad_norm": 7.166142253316326, "learning_rate": 4.74197410017531e-06, "loss": 17.5091, "step": 29047 }, { "epoch": 0.5309740983786353, "grad_norm": 6.595705922538593, "learning_rate": 4.741678483505799e-06, "loss": 17.5326, "step": 29048 }, { "epoch": 0.5309923775750818, "grad_norm": 7.3835527577926845, "learning_rate": 4.741382867741684e-06, "loss": 18.1946, "step": 29049 }, { "epoch": 0.5310106567715284, "grad_norm": 6.338145646823406, "learning_rate": 4.741087252883998e-06, "loss": 17.4536, "step": 29050 }, { "epoch": 0.5310289359679748, "grad_norm": 11.185395669690315, "learning_rate": 4.74079163893378e-06, "loss": 17.9135, "step": 29051 }, { "epoch": 0.5310472151644213, "grad_norm": 7.885925422033358, "learning_rate": 4.740496025892064e-06, "loss": 18.2055, "step": 29052 }, { "epoch": 0.5310654943608679, "grad_norm": 6.817404549241904, "learning_rate": 4.740200413759886e-06, "loss": 17.429, "step": 29053 }, { "epoch": 0.5310837735573144, "grad_norm": 6.044258696525604, "learning_rate": 4.739904802538284e-06, "loss": 17.6657, "step": 29054 }, { "epoch": 0.531102052753761, "grad_norm": 9.037642839197277, "learning_rate": 4.739609192228295e-06, "loss": 17.7689, "step": 29055 }, { "epoch": 0.5311203319502075, "grad_norm": 4.846138987507346, "learning_rate": 4.7393135828309495e-06, "loss": 16.8368, "step": 29056 }, { "epoch": 0.531138611146654, "grad_norm": 10.03554536509534, "learning_rate": 4.7390179743472895e-06, "loss": 17.9387, "step": 29057 }, { "epoch": 0.5311568903431005, "grad_norm": 6.869602546938215, "learning_rate": 4.738722366778346e-06, "loss": 17.7339, "step": 29058 }, { "epoch": 0.531175169539547, "grad_norm": 5.454949928664412, "learning_rate": 4.738426760125162e-06, "loss": 17.0717, "step": 29059 }, { "epoch": 0.5311934487359936, "grad_norm": 5.857091816580592, "learning_rate": 4.738131154388768e-06, "loss": 17.1165, "step": 29060 }, { "epoch": 0.5312117279324401, "grad_norm": 6.930273201413045, "learning_rate": 4.737835549570201e-06, "loss": 17.7204, "step": 29061 }, { "epoch": 0.5312300071288866, "grad_norm": 5.431037714773593, "learning_rate": 4.737539945670498e-06, "loss": 17.1813, "step": 29062 }, { "epoch": 0.5312482863253332, "grad_norm": 6.5420044744411845, "learning_rate": 4.737244342690696e-06, "loss": 17.4828, "step": 29063 }, { "epoch": 0.5312665655217796, "grad_norm": 7.036392217452092, "learning_rate": 4.736948740631827e-06, "loss": 17.8612, "step": 29064 }, { "epoch": 0.5312848447182262, "grad_norm": 7.429120135752747, "learning_rate": 4.736653139494933e-06, "loss": 18.002, "step": 29065 }, { "epoch": 0.5313031239146727, "grad_norm": 7.647763917020927, "learning_rate": 4.736357539281045e-06, "loss": 18.3326, "step": 29066 }, { "epoch": 0.5313214031111192, "grad_norm": 6.714500069030933, "learning_rate": 4.7360619399912e-06, "loss": 17.7183, "step": 29067 }, { "epoch": 0.5313396823075658, "grad_norm": 6.437689205093623, "learning_rate": 4.735766341626437e-06, "loss": 17.5725, "step": 29068 }, { "epoch": 0.5313579615040123, "grad_norm": 5.896080567498599, "learning_rate": 4.735470744187789e-06, "loss": 17.1384, "step": 29069 }, { "epoch": 0.5313762407004589, "grad_norm": 5.706152266296976, "learning_rate": 4.735175147676294e-06, "loss": 17.4059, "step": 29070 }, { "epoch": 0.5313945198969053, "grad_norm": 6.558307824820367, "learning_rate": 4.734879552092986e-06, "loss": 17.5545, "step": 29071 }, { "epoch": 0.5314127990933518, "grad_norm": 6.6419850563836444, "learning_rate": 4.734583957438903e-06, "loss": 17.7131, "step": 29072 }, { "epoch": 0.5314310782897984, "grad_norm": 5.426127406448526, "learning_rate": 4.7342883637150796e-06, "loss": 17.0622, "step": 29073 }, { "epoch": 0.5314493574862449, "grad_norm": 6.008733891505808, "learning_rate": 4.7339927709225524e-06, "loss": 17.4839, "step": 29074 }, { "epoch": 0.5314676366826915, "grad_norm": 5.623738962763688, "learning_rate": 4.733697179062356e-06, "loss": 17.3007, "step": 29075 }, { "epoch": 0.531485915879138, "grad_norm": 5.616239444795882, "learning_rate": 4.733401588135531e-06, "loss": 17.0733, "step": 29076 }, { "epoch": 0.5315041950755844, "grad_norm": 7.862884441361074, "learning_rate": 4.73310599814311e-06, "loss": 17.9129, "step": 29077 }, { "epoch": 0.531522474272031, "grad_norm": 5.009030678254554, "learning_rate": 4.732810409086127e-06, "loss": 16.9358, "step": 29078 }, { "epoch": 0.5315407534684775, "grad_norm": 5.334322342518181, "learning_rate": 4.732514820965621e-06, "loss": 17.1485, "step": 29079 }, { "epoch": 0.5315590326649241, "grad_norm": 6.000890507008084, "learning_rate": 4.73221923378263e-06, "loss": 17.4316, "step": 29080 }, { "epoch": 0.5315773118613706, "grad_norm": 6.659016598136487, "learning_rate": 4.731923647538184e-06, "loss": 17.3543, "step": 29081 }, { "epoch": 0.5315955910578171, "grad_norm": 6.61833062327974, "learning_rate": 4.731628062233325e-06, "loss": 17.6285, "step": 29082 }, { "epoch": 0.5316138702542637, "grad_norm": 5.462550395014614, "learning_rate": 4.731332477869084e-06, "loss": 16.8888, "step": 29083 }, { "epoch": 0.5316321494507101, "grad_norm": 8.559596390802898, "learning_rate": 4.731036894446499e-06, "loss": 18.3411, "step": 29084 }, { "epoch": 0.5316504286471567, "grad_norm": 5.5304459159182136, "learning_rate": 4.730741311966609e-06, "loss": 17.2945, "step": 29085 }, { "epoch": 0.5316687078436032, "grad_norm": 6.4386947966121255, "learning_rate": 4.730445730430447e-06, "loss": 17.6291, "step": 29086 }, { "epoch": 0.5316869870400497, "grad_norm": 6.924546856510563, "learning_rate": 4.730150149839047e-06, "loss": 17.9036, "step": 29087 }, { "epoch": 0.5317052662364963, "grad_norm": 8.26086054422182, "learning_rate": 4.729854570193448e-06, "loss": 18.4739, "step": 29088 }, { "epoch": 0.5317235454329428, "grad_norm": 5.911957173509042, "learning_rate": 4.729558991494685e-06, "loss": 17.5701, "step": 29089 }, { "epoch": 0.5317418246293892, "grad_norm": 7.3927788013912155, "learning_rate": 4.7292634137437965e-06, "loss": 17.8532, "step": 29090 }, { "epoch": 0.5317601038258358, "grad_norm": 6.669605504379684, "learning_rate": 4.728967836941816e-06, "loss": 17.3569, "step": 29091 }, { "epoch": 0.5317783830222823, "grad_norm": 7.796781218527904, "learning_rate": 4.728672261089777e-06, "loss": 17.879, "step": 29092 }, { "epoch": 0.5317966622187289, "grad_norm": 6.581204812453982, "learning_rate": 4.72837668618872e-06, "loss": 17.4378, "step": 29093 }, { "epoch": 0.5318149414151754, "grad_norm": 5.786141109773239, "learning_rate": 4.72808111223968e-06, "loss": 17.3518, "step": 29094 }, { "epoch": 0.5318332206116219, "grad_norm": 5.580146707013943, "learning_rate": 4.72778553924369e-06, "loss": 17.1563, "step": 29095 }, { "epoch": 0.5318514998080685, "grad_norm": 6.7681026442740535, "learning_rate": 4.72748996720179e-06, "loss": 17.6506, "step": 29096 }, { "epoch": 0.5318697790045149, "grad_norm": 6.325593306072917, "learning_rate": 4.727194396115013e-06, "loss": 17.4579, "step": 29097 }, { "epoch": 0.5318880582009615, "grad_norm": 7.750348884085031, "learning_rate": 4.7268988259843945e-06, "loss": 18.1183, "step": 29098 }, { "epoch": 0.531906337397408, "grad_norm": 7.378992994794502, "learning_rate": 4.7266032568109745e-06, "loss": 18.222, "step": 29099 }, { "epoch": 0.5319246165938545, "grad_norm": 7.214803875822746, "learning_rate": 4.726307688595787e-06, "loss": 17.9459, "step": 29100 }, { "epoch": 0.5319428957903011, "grad_norm": 6.175586950398687, "learning_rate": 4.726012121339864e-06, "loss": 17.5421, "step": 29101 }, { "epoch": 0.5319611749867476, "grad_norm": 5.526292871774039, "learning_rate": 4.725716555044246e-06, "loss": 17.3515, "step": 29102 }, { "epoch": 0.5319794541831941, "grad_norm": 6.173101937782035, "learning_rate": 4.72542098970997e-06, "loss": 17.2722, "step": 29103 }, { "epoch": 0.5319977333796406, "grad_norm": 7.939720874584466, "learning_rate": 4.725125425338066e-06, "loss": 17.6554, "step": 29104 }, { "epoch": 0.5320160125760871, "grad_norm": 7.102072818870398, "learning_rate": 4.724829861929576e-06, "loss": 18.0799, "step": 29105 }, { "epoch": 0.5320342917725337, "grad_norm": 6.193719341303695, "learning_rate": 4.724534299485532e-06, "loss": 17.3817, "step": 29106 }, { "epoch": 0.5320525709689802, "grad_norm": 6.710891402253766, "learning_rate": 4.724238738006972e-06, "loss": 17.5157, "step": 29107 }, { "epoch": 0.5320708501654268, "grad_norm": 6.289553488924047, "learning_rate": 4.723943177494932e-06, "loss": 17.2921, "step": 29108 }, { "epoch": 0.5320891293618732, "grad_norm": 7.320087808991515, "learning_rate": 4.723647617950446e-06, "loss": 18.1539, "step": 29109 }, { "epoch": 0.5321074085583197, "grad_norm": 5.961258353746868, "learning_rate": 4.723352059374552e-06, "loss": 17.329, "step": 29110 }, { "epoch": 0.5321256877547663, "grad_norm": 6.051518226625935, "learning_rate": 4.723056501768285e-06, "loss": 17.4943, "step": 29111 }, { "epoch": 0.5321439669512128, "grad_norm": 6.3104499872196715, "learning_rate": 4.722760945132679e-06, "loss": 17.4186, "step": 29112 }, { "epoch": 0.5321622461476594, "grad_norm": 6.005834493972101, "learning_rate": 4.722465389468775e-06, "loss": 17.5438, "step": 29113 }, { "epoch": 0.5321805253441059, "grad_norm": 6.753467752413835, "learning_rate": 4.722169834777605e-06, "loss": 18.0328, "step": 29114 }, { "epoch": 0.5321988045405524, "grad_norm": 6.127705081696299, "learning_rate": 4.7218742810602035e-06, "loss": 17.5204, "step": 29115 }, { "epoch": 0.5322170837369989, "grad_norm": 5.364277239290237, "learning_rate": 4.72157872831761e-06, "loss": 17.2414, "step": 29116 }, { "epoch": 0.5322353629334454, "grad_norm": 4.817707614412788, "learning_rate": 4.72128317655086e-06, "loss": 16.9568, "step": 29117 }, { "epoch": 0.532253642129892, "grad_norm": 5.5802778958409025, "learning_rate": 4.720987625760985e-06, "loss": 17.2708, "step": 29118 }, { "epoch": 0.5322719213263385, "grad_norm": 5.350329158205401, "learning_rate": 4.720692075949027e-06, "loss": 17.2927, "step": 29119 }, { "epoch": 0.532290200522785, "grad_norm": 6.25668171940735, "learning_rate": 4.720396527116018e-06, "loss": 17.4023, "step": 29120 }, { "epoch": 0.5323084797192316, "grad_norm": 6.244412090560745, "learning_rate": 4.720100979262995e-06, "loss": 17.5831, "step": 29121 }, { "epoch": 0.532326758915678, "grad_norm": 7.59218018480341, "learning_rate": 4.719805432390995e-06, "loss": 17.7089, "step": 29122 }, { "epoch": 0.5323450381121246, "grad_norm": 9.935650537819685, "learning_rate": 4.7195098865010504e-06, "loss": 18.3908, "step": 29123 }, { "epoch": 0.5323633173085711, "grad_norm": 6.495075807465171, "learning_rate": 4.719214341594201e-06, "loss": 17.6079, "step": 29124 }, { "epoch": 0.5323815965050176, "grad_norm": 7.914936385801918, "learning_rate": 4.7189187976714804e-06, "loss": 18.2558, "step": 29125 }, { "epoch": 0.5323998757014642, "grad_norm": 6.230754304976618, "learning_rate": 4.718623254733924e-06, "loss": 17.5205, "step": 29126 }, { "epoch": 0.5324181548979107, "grad_norm": 5.421609703568032, "learning_rate": 4.71832771278257e-06, "loss": 17.2301, "step": 29127 }, { "epoch": 0.5324364340943573, "grad_norm": 6.478292233647558, "learning_rate": 4.718032171818453e-06, "loss": 17.5168, "step": 29128 }, { "epoch": 0.5324547132908037, "grad_norm": 6.688539822736038, "learning_rate": 4.717736631842608e-06, "loss": 17.6626, "step": 29129 }, { "epoch": 0.5324729924872502, "grad_norm": 6.928218580494495, "learning_rate": 4.717441092856072e-06, "loss": 18.171, "step": 29130 }, { "epoch": 0.5324912716836968, "grad_norm": 6.56043645920163, "learning_rate": 4.7171455548598816e-06, "loss": 17.7995, "step": 29131 }, { "epoch": 0.5325095508801433, "grad_norm": 6.150982416930782, "learning_rate": 4.7168500178550695e-06, "loss": 17.4851, "step": 29132 }, { "epoch": 0.5325278300765899, "grad_norm": 6.320931657999938, "learning_rate": 4.716554481842674e-06, "loss": 17.5192, "step": 29133 }, { "epoch": 0.5325461092730364, "grad_norm": 4.759395082450143, "learning_rate": 4.716258946823732e-06, "loss": 16.7456, "step": 29134 }, { "epoch": 0.5325643884694828, "grad_norm": 7.486771062381638, "learning_rate": 4.715963412799276e-06, "loss": 17.7593, "step": 29135 }, { "epoch": 0.5325826676659294, "grad_norm": 5.549348222321404, "learning_rate": 4.715667879770345e-06, "loss": 17.1854, "step": 29136 }, { "epoch": 0.5326009468623759, "grad_norm": 5.915789294304772, "learning_rate": 4.715372347737971e-06, "loss": 17.4329, "step": 29137 }, { "epoch": 0.5326192260588225, "grad_norm": 9.867560162165196, "learning_rate": 4.715076816703194e-06, "loss": 17.8107, "step": 29138 }, { "epoch": 0.532637505255269, "grad_norm": 6.532373079707325, "learning_rate": 4.714781286667048e-06, "loss": 17.5884, "step": 29139 }, { "epoch": 0.5326557844517155, "grad_norm": 5.020319197667657, "learning_rate": 4.714485757630568e-06, "loss": 16.8819, "step": 29140 }, { "epoch": 0.532674063648162, "grad_norm": 7.2737297847563696, "learning_rate": 4.714190229594792e-06, "loss": 17.9487, "step": 29141 }, { "epoch": 0.5326923428446085, "grad_norm": 6.0278678970210535, "learning_rate": 4.713894702560754e-06, "loss": 17.3564, "step": 29142 }, { "epoch": 0.5327106220410551, "grad_norm": 8.863413400127481, "learning_rate": 4.713599176529488e-06, "loss": 18.677, "step": 29143 }, { "epoch": 0.5327289012375016, "grad_norm": 6.13073041808526, "learning_rate": 4.713303651502036e-06, "loss": 17.609, "step": 29144 }, { "epoch": 0.5327471804339481, "grad_norm": 7.0548541991269555, "learning_rate": 4.713008127479429e-06, "loss": 17.8444, "step": 29145 }, { "epoch": 0.5327654596303947, "grad_norm": 6.3594455768943385, "learning_rate": 4.712712604462701e-06, "loss": 17.6913, "step": 29146 }, { "epoch": 0.5327837388268412, "grad_norm": 7.358024674587688, "learning_rate": 4.712417082452892e-06, "loss": 18.1915, "step": 29147 }, { "epoch": 0.5328020180232877, "grad_norm": 7.27916880003968, "learning_rate": 4.7121215614510365e-06, "loss": 17.7422, "step": 29148 }, { "epoch": 0.5328202972197342, "grad_norm": 6.10328131209803, "learning_rate": 4.711826041458169e-06, "loss": 17.4295, "step": 29149 }, { "epoch": 0.5328385764161807, "grad_norm": 6.993145169384505, "learning_rate": 4.711530522475327e-06, "loss": 17.9042, "step": 29150 }, { "epoch": 0.5328568556126273, "grad_norm": 6.224101834741438, "learning_rate": 4.711235004503544e-06, "loss": 17.4397, "step": 29151 }, { "epoch": 0.5328751348090738, "grad_norm": 6.784740796828709, "learning_rate": 4.7109394875438585e-06, "loss": 17.503, "step": 29152 }, { "epoch": 0.5328934140055204, "grad_norm": 5.454179536820799, "learning_rate": 4.710643971597306e-06, "loss": 17.3178, "step": 29153 }, { "epoch": 0.5329116932019669, "grad_norm": 5.419832965479272, "learning_rate": 4.710348456664919e-06, "loss": 17.1325, "step": 29154 }, { "epoch": 0.5329299723984133, "grad_norm": 6.468622716265971, "learning_rate": 4.710052942747738e-06, "loss": 17.3501, "step": 29155 }, { "epoch": 0.5329482515948599, "grad_norm": 5.677443523185989, "learning_rate": 4.709757429846795e-06, "loss": 17.5229, "step": 29156 }, { "epoch": 0.5329665307913064, "grad_norm": 6.436444698835134, "learning_rate": 4.709461917963126e-06, "loss": 17.6191, "step": 29157 }, { "epoch": 0.5329848099877529, "grad_norm": 6.253066463006159, "learning_rate": 4.709166407097769e-06, "loss": 17.4264, "step": 29158 }, { "epoch": 0.5330030891841995, "grad_norm": 5.261050823052316, "learning_rate": 4.70887089725176e-06, "loss": 17.2113, "step": 29159 }, { "epoch": 0.533021368380646, "grad_norm": 6.257819167140041, "learning_rate": 4.708575388426131e-06, "loss": 17.4203, "step": 29160 }, { "epoch": 0.5330396475770925, "grad_norm": 6.513401227549751, "learning_rate": 4.70827988062192e-06, "loss": 17.6654, "step": 29161 }, { "epoch": 0.533057926773539, "grad_norm": 5.635081993814778, "learning_rate": 4.707984373840164e-06, "loss": 17.2523, "step": 29162 }, { "epoch": 0.5330762059699855, "grad_norm": 8.04649228287663, "learning_rate": 4.707688868081896e-06, "loss": 17.7352, "step": 29163 }, { "epoch": 0.5330944851664321, "grad_norm": 5.31590699965306, "learning_rate": 4.707393363348154e-06, "loss": 17.1076, "step": 29164 }, { "epoch": 0.5331127643628786, "grad_norm": 5.554912643104351, "learning_rate": 4.707097859639972e-06, "loss": 17.2083, "step": 29165 }, { "epoch": 0.5331310435593252, "grad_norm": 7.071426242434656, "learning_rate": 4.7068023569583865e-06, "loss": 17.7656, "step": 29166 }, { "epoch": 0.5331493227557716, "grad_norm": 5.76104893770655, "learning_rate": 4.706506855304435e-06, "loss": 17.2488, "step": 29167 }, { "epoch": 0.5331676019522181, "grad_norm": 6.712165285165031, "learning_rate": 4.7062113546791496e-06, "loss": 17.3628, "step": 29168 }, { "epoch": 0.5331858811486647, "grad_norm": 6.7751911834935195, "learning_rate": 4.70591585508357e-06, "loss": 17.6679, "step": 29169 }, { "epoch": 0.5332041603451112, "grad_norm": 6.4036925971669225, "learning_rate": 4.705620356518729e-06, "loss": 17.7652, "step": 29170 }, { "epoch": 0.5332224395415578, "grad_norm": 5.620878637163123, "learning_rate": 4.705324858985662e-06, "loss": 17.3258, "step": 29171 }, { "epoch": 0.5332407187380043, "grad_norm": 6.484494676186936, "learning_rate": 4.705029362485407e-06, "loss": 17.6879, "step": 29172 }, { "epoch": 0.5332589979344508, "grad_norm": 6.126985357715734, "learning_rate": 4.704733867018999e-06, "loss": 17.608, "step": 29173 }, { "epoch": 0.5332772771308973, "grad_norm": 6.061217895217802, "learning_rate": 4.704438372587471e-06, "loss": 17.3942, "step": 29174 }, { "epoch": 0.5332955563273438, "grad_norm": 6.91946908138377, "learning_rate": 4.704142879191862e-06, "loss": 17.885, "step": 29175 }, { "epoch": 0.5333138355237904, "grad_norm": 5.638756120268821, "learning_rate": 4.703847386833207e-06, "loss": 17.4492, "step": 29176 }, { "epoch": 0.5333321147202369, "grad_norm": 7.360688988167568, "learning_rate": 4.70355189551254e-06, "loss": 17.8167, "step": 29177 }, { "epoch": 0.5333503939166834, "grad_norm": 5.554017683149972, "learning_rate": 4.7032564052309e-06, "loss": 17.3239, "step": 29178 }, { "epoch": 0.53336867311313, "grad_norm": 7.220440437032762, "learning_rate": 4.7029609159893196e-06, "loss": 17.9182, "step": 29179 }, { "epoch": 0.5333869523095764, "grad_norm": 5.617028454349031, "learning_rate": 4.702665427788833e-06, "loss": 17.1134, "step": 29180 }, { "epoch": 0.533405231506023, "grad_norm": 7.210935894788144, "learning_rate": 4.702369940630482e-06, "loss": 17.7359, "step": 29181 }, { "epoch": 0.5334235107024695, "grad_norm": 5.442315779787362, "learning_rate": 4.702074454515296e-06, "loss": 17.3984, "step": 29182 }, { "epoch": 0.533441789898916, "grad_norm": 5.732815744973244, "learning_rate": 4.701778969444315e-06, "loss": 17.2546, "step": 29183 }, { "epoch": 0.5334600690953626, "grad_norm": 6.875816693462245, "learning_rate": 4.701483485418571e-06, "loss": 17.7802, "step": 29184 }, { "epoch": 0.5334783482918091, "grad_norm": 5.564279309972907, "learning_rate": 4.701188002439101e-06, "loss": 17.452, "step": 29185 }, { "epoch": 0.5334966274882557, "grad_norm": 5.306153078862511, "learning_rate": 4.700892520506944e-06, "loss": 17.2651, "step": 29186 }, { "epoch": 0.5335149066847021, "grad_norm": 14.53928973098706, "learning_rate": 4.700597039623133e-06, "loss": 17.6077, "step": 29187 }, { "epoch": 0.5335331858811486, "grad_norm": 6.369984681166308, "learning_rate": 4.7003015597887e-06, "loss": 17.5793, "step": 29188 }, { "epoch": 0.5335514650775952, "grad_norm": 7.481302517731318, "learning_rate": 4.700006081004685e-06, "loss": 18.1637, "step": 29189 }, { "epoch": 0.5335697442740417, "grad_norm": 6.582023609889075, "learning_rate": 4.699710603272125e-06, "loss": 17.6102, "step": 29190 }, { "epoch": 0.5335880234704883, "grad_norm": 6.980630320109185, "learning_rate": 4.699415126592051e-06, "loss": 17.8523, "step": 29191 }, { "epoch": 0.5336063026669348, "grad_norm": 5.382278433913923, "learning_rate": 4.699119650965502e-06, "loss": 16.9737, "step": 29192 }, { "epoch": 0.5336245818633812, "grad_norm": 5.500432465719919, "learning_rate": 4.698824176393512e-06, "loss": 17.2951, "step": 29193 }, { "epoch": 0.5336428610598278, "grad_norm": 5.730394696563894, "learning_rate": 4.698528702877116e-06, "loss": 17.4959, "step": 29194 }, { "epoch": 0.5336611402562743, "grad_norm": 5.30363362092714, "learning_rate": 4.6982332304173524e-06, "loss": 17.0591, "step": 29195 }, { "epoch": 0.5336794194527209, "grad_norm": 5.358670228367869, "learning_rate": 4.697937759015254e-06, "loss": 17.1578, "step": 29196 }, { "epoch": 0.5336976986491674, "grad_norm": 8.413072866315387, "learning_rate": 4.697642288671858e-06, "loss": 18.0521, "step": 29197 }, { "epoch": 0.5337159778456139, "grad_norm": 4.688684094204592, "learning_rate": 4.697346819388201e-06, "loss": 16.8042, "step": 29198 }, { "epoch": 0.5337342570420605, "grad_norm": 6.053872229728858, "learning_rate": 4.697051351165314e-06, "loss": 17.4931, "step": 29199 }, { "epoch": 0.5337525362385069, "grad_norm": 5.765015298125997, "learning_rate": 4.6967558840042395e-06, "loss": 17.0297, "step": 29200 }, { "epoch": 0.5337708154349535, "grad_norm": 5.818301652851261, "learning_rate": 4.696460417906007e-06, "loss": 17.0998, "step": 29201 }, { "epoch": 0.5337890946314, "grad_norm": 5.126136644576303, "learning_rate": 4.696164952871654e-06, "loss": 17.2133, "step": 29202 }, { "epoch": 0.5338073738278465, "grad_norm": 5.614543121003322, "learning_rate": 4.695869488902218e-06, "loss": 17.2043, "step": 29203 }, { "epoch": 0.5338256530242931, "grad_norm": 5.950383837368383, "learning_rate": 4.695574025998733e-06, "loss": 17.3344, "step": 29204 }, { "epoch": 0.5338439322207396, "grad_norm": 6.149957103878116, "learning_rate": 4.6952785641622326e-06, "loss": 17.3925, "step": 29205 }, { "epoch": 0.5338622114171861, "grad_norm": 5.709616959971137, "learning_rate": 4.694983103393756e-06, "loss": 17.1373, "step": 29206 }, { "epoch": 0.5338804906136326, "grad_norm": 7.2074863095903705, "learning_rate": 4.694687643694338e-06, "loss": 17.7656, "step": 29207 }, { "epoch": 0.5338987698100791, "grad_norm": 6.052629056621195, "learning_rate": 4.694392185065011e-06, "loss": 17.3853, "step": 29208 }, { "epoch": 0.5339170490065257, "grad_norm": 6.6373186264798445, "learning_rate": 4.694096727506815e-06, "loss": 17.3346, "step": 29209 }, { "epoch": 0.5339353282029722, "grad_norm": 6.387124795224236, "learning_rate": 4.693801271020783e-06, "loss": 17.9033, "step": 29210 }, { "epoch": 0.5339536073994188, "grad_norm": 6.223251258431593, "learning_rate": 4.693505815607949e-06, "loss": 17.4301, "step": 29211 }, { "epoch": 0.5339718865958653, "grad_norm": 6.576451745258645, "learning_rate": 4.693210361269352e-06, "loss": 17.6064, "step": 29212 }, { "epoch": 0.5339901657923117, "grad_norm": 6.325741136202451, "learning_rate": 4.692914908006026e-06, "loss": 17.3868, "step": 29213 }, { "epoch": 0.5340084449887583, "grad_norm": 5.453538743658066, "learning_rate": 4.692619455819008e-06, "loss": 17.3604, "step": 29214 }, { "epoch": 0.5340267241852048, "grad_norm": 6.395024201833934, "learning_rate": 4.69232400470933e-06, "loss": 17.4291, "step": 29215 }, { "epoch": 0.5340450033816514, "grad_norm": 5.261791959142509, "learning_rate": 4.692028554678029e-06, "loss": 17.1114, "step": 29216 }, { "epoch": 0.5340632825780979, "grad_norm": 6.792270631815648, "learning_rate": 4.691733105726144e-06, "loss": 17.5678, "step": 29217 }, { "epoch": 0.5340815617745444, "grad_norm": 6.80209010429963, "learning_rate": 4.691437657854707e-06, "loss": 17.6784, "step": 29218 }, { "epoch": 0.534099840970991, "grad_norm": 6.498573399814959, "learning_rate": 4.691142211064753e-06, "loss": 17.5499, "step": 29219 }, { "epoch": 0.5341181201674374, "grad_norm": 6.3261239509177045, "learning_rate": 4.690846765357319e-06, "loss": 17.424, "step": 29220 }, { "epoch": 0.534136399363884, "grad_norm": 6.458664907486099, "learning_rate": 4.690551320733442e-06, "loss": 17.4369, "step": 29221 }, { "epoch": 0.5341546785603305, "grad_norm": 6.926856980821773, "learning_rate": 4.690255877194152e-06, "loss": 17.9459, "step": 29222 }, { "epoch": 0.534172957756777, "grad_norm": 5.482993153007825, "learning_rate": 4.6899604347404925e-06, "loss": 17.0677, "step": 29223 }, { "epoch": 0.5341912369532236, "grad_norm": 5.95549853159849, "learning_rate": 4.689664993373493e-06, "loss": 17.4105, "step": 29224 }, { "epoch": 0.53420951614967, "grad_norm": 5.7026105701250716, "learning_rate": 4.689369553094189e-06, "loss": 17.3671, "step": 29225 }, { "epoch": 0.5342277953461165, "grad_norm": 7.3686976538510685, "learning_rate": 4.689074113903621e-06, "loss": 17.7183, "step": 29226 }, { "epoch": 0.5342460745425631, "grad_norm": 6.252171421143555, "learning_rate": 4.688778675802818e-06, "loss": 17.5999, "step": 29227 }, { "epoch": 0.5342643537390096, "grad_norm": 5.786479766304065, "learning_rate": 4.688483238792822e-06, "loss": 17.4693, "step": 29228 }, { "epoch": 0.5342826329354562, "grad_norm": 6.364261824826348, "learning_rate": 4.688187802874663e-06, "loss": 17.3688, "step": 29229 }, { "epoch": 0.5343009121319027, "grad_norm": 6.8465039000472165, "learning_rate": 4.6878923680493785e-06, "loss": 17.9025, "step": 29230 }, { "epoch": 0.5343191913283492, "grad_norm": 7.833967474762711, "learning_rate": 4.687596934318006e-06, "loss": 17.7981, "step": 29231 }, { "epoch": 0.5343374705247957, "grad_norm": 6.664933874321917, "learning_rate": 4.687301501681579e-06, "loss": 17.7476, "step": 29232 }, { "epoch": 0.5343557497212422, "grad_norm": 6.091551360699175, "learning_rate": 4.687006070141131e-06, "loss": 17.5468, "step": 29233 }, { "epoch": 0.5343740289176888, "grad_norm": 6.8923573712614585, "learning_rate": 4.686710639697701e-06, "loss": 17.692, "step": 29234 }, { "epoch": 0.5343923081141353, "grad_norm": 6.937381768672017, "learning_rate": 4.686415210352324e-06, "loss": 17.6177, "step": 29235 }, { "epoch": 0.5344105873105818, "grad_norm": 6.084408330732266, "learning_rate": 4.6861197821060315e-06, "loss": 17.4911, "step": 29236 }, { "epoch": 0.5344288665070284, "grad_norm": 7.44522130052418, "learning_rate": 4.685824354959865e-06, "loss": 17.7589, "step": 29237 }, { "epoch": 0.5344471457034748, "grad_norm": 4.454374701917882, "learning_rate": 4.685528928914855e-06, "loss": 16.7307, "step": 29238 }, { "epoch": 0.5344654248999214, "grad_norm": 6.715146535940895, "learning_rate": 4.685233503972039e-06, "loss": 17.6364, "step": 29239 }, { "epoch": 0.5344837040963679, "grad_norm": 7.0344452900829735, "learning_rate": 4.684938080132454e-06, "loss": 17.7868, "step": 29240 }, { "epoch": 0.5345019832928144, "grad_norm": 6.1756329523202345, "learning_rate": 4.684642657397132e-06, "loss": 17.5986, "step": 29241 }, { "epoch": 0.534520262489261, "grad_norm": 5.955811314738884, "learning_rate": 4.684347235767111e-06, "loss": 17.3416, "step": 29242 }, { "epoch": 0.5345385416857075, "grad_norm": 5.508464569377075, "learning_rate": 4.6840518152434245e-06, "loss": 17.2384, "step": 29243 }, { "epoch": 0.5345568208821541, "grad_norm": 6.255405359310729, "learning_rate": 4.6837563958271085e-06, "loss": 17.5851, "step": 29244 }, { "epoch": 0.5345751000786005, "grad_norm": 5.702873854884792, "learning_rate": 4.6834609775192e-06, "loss": 17.2959, "step": 29245 }, { "epoch": 0.534593379275047, "grad_norm": 5.607666762210169, "learning_rate": 4.683165560320735e-06, "loss": 17.1606, "step": 29246 }, { "epoch": 0.5346116584714936, "grad_norm": 5.07753553773357, "learning_rate": 4.682870144232744e-06, "loss": 17.0299, "step": 29247 }, { "epoch": 0.5346299376679401, "grad_norm": 5.812528319347235, "learning_rate": 4.682574729256266e-06, "loss": 17.3234, "step": 29248 }, { "epoch": 0.5346482168643867, "grad_norm": 5.261893376053922, "learning_rate": 4.682279315392339e-06, "loss": 17.1088, "step": 29249 }, { "epoch": 0.5346664960608332, "grad_norm": 8.080050959436333, "learning_rate": 4.681983902641992e-06, "loss": 17.9734, "step": 29250 }, { "epoch": 0.5346847752572796, "grad_norm": 6.2807987098320295, "learning_rate": 4.681688491006267e-06, "loss": 17.4539, "step": 29251 }, { "epoch": 0.5347030544537262, "grad_norm": 7.599171447651194, "learning_rate": 4.681393080486194e-06, "loss": 18.1529, "step": 29252 }, { "epoch": 0.5347213336501727, "grad_norm": 6.1447741984974575, "learning_rate": 4.68109767108281e-06, "loss": 17.4327, "step": 29253 }, { "epoch": 0.5347396128466193, "grad_norm": 6.389357331246572, "learning_rate": 4.680802262797153e-06, "loss": 17.5275, "step": 29254 }, { "epoch": 0.5347578920430658, "grad_norm": 5.745397360469146, "learning_rate": 4.6805068556302555e-06, "loss": 17.2144, "step": 29255 }, { "epoch": 0.5347761712395123, "grad_norm": 5.640009675799337, "learning_rate": 4.680211449583153e-06, "loss": 17.225, "step": 29256 }, { "epoch": 0.5347944504359589, "grad_norm": 6.701663513066303, "learning_rate": 4.679916044656883e-06, "loss": 17.5104, "step": 29257 }, { "epoch": 0.5348127296324053, "grad_norm": 6.758906431610483, "learning_rate": 4.679620640852477e-06, "loss": 17.8155, "step": 29258 }, { "epoch": 0.5348310088288519, "grad_norm": 5.804176758180699, "learning_rate": 4.679325238170977e-06, "loss": 17.1583, "step": 29259 }, { "epoch": 0.5348492880252984, "grad_norm": 6.456709927031515, "learning_rate": 4.679029836613411e-06, "loss": 17.827, "step": 29260 }, { "epoch": 0.5348675672217449, "grad_norm": 5.544919597973098, "learning_rate": 4.678734436180818e-06, "loss": 17.2689, "step": 29261 }, { "epoch": 0.5348858464181915, "grad_norm": 6.694230767897984, "learning_rate": 4.678439036874234e-06, "loss": 17.6911, "step": 29262 }, { "epoch": 0.534904125614638, "grad_norm": 7.383500252683421, "learning_rate": 4.6781436386946935e-06, "loss": 17.6856, "step": 29263 }, { "epoch": 0.5349224048110846, "grad_norm": 6.969245483848144, "learning_rate": 4.6778482416432295e-06, "loss": 17.3148, "step": 29264 }, { "epoch": 0.534940684007531, "grad_norm": 6.455067519609029, "learning_rate": 4.677552845720881e-06, "loss": 17.6421, "step": 29265 }, { "epoch": 0.5349589632039775, "grad_norm": 5.647349330468523, "learning_rate": 4.677257450928682e-06, "loss": 17.0891, "step": 29266 }, { "epoch": 0.5349772424004241, "grad_norm": 5.590856780988894, "learning_rate": 4.676962057267667e-06, "loss": 17.1274, "step": 29267 }, { "epoch": 0.5349955215968706, "grad_norm": 8.717455084846353, "learning_rate": 4.676666664738873e-06, "loss": 17.8971, "step": 29268 }, { "epoch": 0.5350138007933172, "grad_norm": 7.189686572730251, "learning_rate": 4.676371273343333e-06, "loss": 17.9889, "step": 29269 }, { "epoch": 0.5350320799897637, "grad_norm": 7.806460932605605, "learning_rate": 4.676075883082083e-06, "loss": 18.0796, "step": 29270 }, { "epoch": 0.5350503591862101, "grad_norm": 7.254536934137691, "learning_rate": 4.675780493956161e-06, "loss": 17.829, "step": 29271 }, { "epoch": 0.5350686383826567, "grad_norm": 8.41694626434146, "learning_rate": 4.6754851059666e-06, "loss": 18.4648, "step": 29272 }, { "epoch": 0.5350869175791032, "grad_norm": 5.5703289626038055, "learning_rate": 4.6751897191144335e-06, "loss": 17.2874, "step": 29273 }, { "epoch": 0.5351051967755498, "grad_norm": 5.408001157120569, "learning_rate": 4.6748943334007e-06, "loss": 17.1824, "step": 29274 }, { "epoch": 0.5351234759719963, "grad_norm": 5.930717778294631, "learning_rate": 4.674598948826432e-06, "loss": 16.822, "step": 29275 }, { "epoch": 0.5351417551684428, "grad_norm": 6.642122104741229, "learning_rate": 4.67430356539267e-06, "loss": 17.3118, "step": 29276 }, { "epoch": 0.5351600343648893, "grad_norm": 6.719707459775767, "learning_rate": 4.674008183100445e-06, "loss": 17.6993, "step": 29277 }, { "epoch": 0.5351783135613358, "grad_norm": 6.797813782800309, "learning_rate": 4.67371280195079e-06, "loss": 17.8031, "step": 29278 }, { "epoch": 0.5351965927577824, "grad_norm": 5.928483262475636, "learning_rate": 4.673417421944746e-06, "loss": 17.1513, "step": 29279 }, { "epoch": 0.5352148719542289, "grad_norm": 5.708362227727168, "learning_rate": 4.673122043083345e-06, "loss": 17.2476, "step": 29280 }, { "epoch": 0.5352331511506754, "grad_norm": 6.456114185780829, "learning_rate": 4.6728266653676225e-06, "loss": 17.3815, "step": 29281 }, { "epoch": 0.535251430347122, "grad_norm": 6.649650343554105, "learning_rate": 4.6725312887986154e-06, "loss": 17.2747, "step": 29282 }, { "epoch": 0.5352697095435685, "grad_norm": 5.574568616832653, "learning_rate": 4.672235913377357e-06, "loss": 17.35, "step": 29283 }, { "epoch": 0.535287988740015, "grad_norm": 5.443705554389813, "learning_rate": 4.671940539104881e-06, "loss": 17.3226, "step": 29284 }, { "epoch": 0.5353062679364615, "grad_norm": 5.73942444897739, "learning_rate": 4.6716451659822284e-06, "loss": 17.2158, "step": 29285 }, { "epoch": 0.535324547132908, "grad_norm": 6.801894375705081, "learning_rate": 4.67134979401043e-06, "loss": 17.5671, "step": 29286 }, { "epoch": 0.5353428263293546, "grad_norm": 6.290705725065143, "learning_rate": 4.671054423190521e-06, "loss": 17.4168, "step": 29287 }, { "epoch": 0.5353611055258011, "grad_norm": 5.743885114201012, "learning_rate": 4.670759053523538e-06, "loss": 17.2939, "step": 29288 }, { "epoch": 0.5353793847222477, "grad_norm": 5.7248347951083245, "learning_rate": 4.670463685010514e-06, "loss": 17.2367, "step": 29289 }, { "epoch": 0.5353976639186941, "grad_norm": 7.111293871021303, "learning_rate": 4.670168317652491e-06, "loss": 17.7536, "step": 29290 }, { "epoch": 0.5354159431151406, "grad_norm": 8.535309486135604, "learning_rate": 4.669872951450497e-06, "loss": 18.1195, "step": 29291 }, { "epoch": 0.5354342223115872, "grad_norm": 5.840101600532595, "learning_rate": 4.669577586405568e-06, "loss": 17.271, "step": 29292 }, { "epoch": 0.5354525015080337, "grad_norm": 6.973643897993268, "learning_rate": 4.669282222518743e-06, "loss": 17.6406, "step": 29293 }, { "epoch": 0.5354707807044802, "grad_norm": 6.14925160338331, "learning_rate": 4.6689868597910555e-06, "loss": 17.2322, "step": 29294 }, { "epoch": 0.5354890599009268, "grad_norm": 6.997981700307357, "learning_rate": 4.668691498223537e-06, "loss": 17.5883, "step": 29295 }, { "epoch": 0.5355073390973732, "grad_norm": 7.53567212091816, "learning_rate": 4.668396137817231e-06, "loss": 17.8659, "step": 29296 }, { "epoch": 0.5355256182938198, "grad_norm": 5.8336664721152065, "learning_rate": 4.668100778573164e-06, "loss": 17.2724, "step": 29297 }, { "epoch": 0.5355438974902663, "grad_norm": 5.951920364049022, "learning_rate": 4.667805420492375e-06, "loss": 17.2534, "step": 29298 }, { "epoch": 0.5355621766867128, "grad_norm": 5.349078049617566, "learning_rate": 4.6675100635759015e-06, "loss": 17.33, "step": 29299 }, { "epoch": 0.5355804558831594, "grad_norm": 5.181484811256063, "learning_rate": 4.667214707824776e-06, "loss": 16.9174, "step": 29300 }, { "epoch": 0.5355987350796059, "grad_norm": 8.621212699436843, "learning_rate": 4.6669193532400326e-06, "loss": 17.8028, "step": 29301 }, { "epoch": 0.5356170142760525, "grad_norm": 6.008305903796612, "learning_rate": 4.666623999822708e-06, "loss": 17.0339, "step": 29302 }, { "epoch": 0.5356352934724989, "grad_norm": 7.774715545069141, "learning_rate": 4.666328647573837e-06, "loss": 17.7492, "step": 29303 }, { "epoch": 0.5356535726689454, "grad_norm": 6.096256932202169, "learning_rate": 4.666033296494457e-06, "loss": 17.556, "step": 29304 }, { "epoch": 0.535671851865392, "grad_norm": 6.884931342989107, "learning_rate": 4.665737946585602e-06, "loss": 17.2523, "step": 29305 }, { "epoch": 0.5356901310618385, "grad_norm": 6.185112709333423, "learning_rate": 4.665442597848303e-06, "loss": 17.6004, "step": 29306 }, { "epoch": 0.5357084102582851, "grad_norm": 7.236608712566107, "learning_rate": 4.665147250283601e-06, "loss": 17.9588, "step": 29307 }, { "epoch": 0.5357266894547316, "grad_norm": 5.671268378623509, "learning_rate": 4.6648519038925285e-06, "loss": 17.0656, "step": 29308 }, { "epoch": 0.535744968651178, "grad_norm": 8.802022106221187, "learning_rate": 4.66455655867612e-06, "loss": 17.9557, "step": 29309 }, { "epoch": 0.5357632478476246, "grad_norm": 5.864360534832958, "learning_rate": 4.664261214635414e-06, "loss": 17.3295, "step": 29310 }, { "epoch": 0.5357815270440711, "grad_norm": 7.331905969731185, "learning_rate": 4.663965871771441e-06, "loss": 18.0535, "step": 29311 }, { "epoch": 0.5357998062405177, "grad_norm": 7.08138413712087, "learning_rate": 4.663670530085239e-06, "loss": 17.7963, "step": 29312 }, { "epoch": 0.5358180854369642, "grad_norm": 5.424243891684073, "learning_rate": 4.663375189577843e-06, "loss": 17.1795, "step": 29313 }, { "epoch": 0.5358363646334107, "grad_norm": 6.239865055155216, "learning_rate": 4.66307985025029e-06, "loss": 17.2509, "step": 29314 }, { "epoch": 0.5358546438298573, "grad_norm": 5.735952050430516, "learning_rate": 4.6627845121036084e-06, "loss": 17.1316, "step": 29315 }, { "epoch": 0.5358729230263037, "grad_norm": 5.565925724658268, "learning_rate": 4.66248917513884e-06, "loss": 17.1876, "step": 29316 }, { "epoch": 0.5358912022227503, "grad_norm": 6.106526816844541, "learning_rate": 4.662193839357019e-06, "loss": 17.2983, "step": 29317 }, { "epoch": 0.5359094814191968, "grad_norm": 6.100422188196055, "learning_rate": 4.661898504759176e-06, "loss": 17.3339, "step": 29318 }, { "epoch": 0.5359277606156433, "grad_norm": 9.067345685594086, "learning_rate": 4.661603171346352e-06, "loss": 18.2016, "step": 29319 }, { "epoch": 0.5359460398120899, "grad_norm": 5.759043408835573, "learning_rate": 4.661307839119579e-06, "loss": 17.36, "step": 29320 }, { "epoch": 0.5359643190085364, "grad_norm": 7.074325023844372, "learning_rate": 4.661012508079892e-06, "loss": 17.9006, "step": 29321 }, { "epoch": 0.535982598204983, "grad_norm": 4.682188078011207, "learning_rate": 4.660717178228328e-06, "loss": 16.8074, "step": 29322 }, { "epoch": 0.5360008774014294, "grad_norm": 5.932596754632673, "learning_rate": 4.660421849565919e-06, "loss": 17.3143, "step": 29323 }, { "epoch": 0.5360191565978759, "grad_norm": 5.032828189967548, "learning_rate": 4.660126522093704e-06, "loss": 17.1526, "step": 29324 }, { "epoch": 0.5360374357943225, "grad_norm": 5.933551125599635, "learning_rate": 4.659831195812716e-06, "loss": 17.634, "step": 29325 }, { "epoch": 0.536055714990769, "grad_norm": 5.250106738628413, "learning_rate": 4.659535870723988e-06, "loss": 16.9836, "step": 29326 }, { "epoch": 0.5360739941872156, "grad_norm": 7.230189637800085, "learning_rate": 4.65924054682856e-06, "loss": 17.9238, "step": 29327 }, { "epoch": 0.536092273383662, "grad_norm": 7.002282883504601, "learning_rate": 4.658945224127462e-06, "loss": 17.5911, "step": 29328 }, { "epoch": 0.5361105525801085, "grad_norm": 5.822483441629879, "learning_rate": 4.65864990262173e-06, "loss": 17.3025, "step": 29329 }, { "epoch": 0.5361288317765551, "grad_norm": 6.458043066053483, "learning_rate": 4.658354582312405e-06, "loss": 17.4127, "step": 29330 }, { "epoch": 0.5361471109730016, "grad_norm": 7.808095657410022, "learning_rate": 4.658059263200516e-06, "loss": 18.3442, "step": 29331 }, { "epoch": 0.5361653901694482, "grad_norm": 5.579122407499592, "learning_rate": 4.657763945287097e-06, "loss": 17.3155, "step": 29332 }, { "epoch": 0.5361836693658947, "grad_norm": 5.269806281522528, "learning_rate": 4.657468628573188e-06, "loss": 17.0414, "step": 29333 }, { "epoch": 0.5362019485623412, "grad_norm": 7.400005842810225, "learning_rate": 4.65717331305982e-06, "loss": 18.1298, "step": 29334 }, { "epoch": 0.5362202277587877, "grad_norm": 8.19494904366371, "learning_rate": 4.656877998748032e-06, "loss": 17.7911, "step": 29335 }, { "epoch": 0.5362385069552342, "grad_norm": 5.395202795013228, "learning_rate": 4.656582685638857e-06, "loss": 17.2497, "step": 29336 }, { "epoch": 0.5362567861516808, "grad_norm": 5.708600503897889, "learning_rate": 4.656287373733328e-06, "loss": 17.2469, "step": 29337 }, { "epoch": 0.5362750653481273, "grad_norm": 5.47260203117835, "learning_rate": 4.6559920630324825e-06, "loss": 17.0903, "step": 29338 }, { "epoch": 0.5362933445445738, "grad_norm": 6.778494362597934, "learning_rate": 4.655696753537356e-06, "loss": 17.8149, "step": 29339 }, { "epoch": 0.5363116237410204, "grad_norm": 7.044672217424278, "learning_rate": 4.655401445248981e-06, "loss": 17.8323, "step": 29340 }, { "epoch": 0.5363299029374669, "grad_norm": 5.916865508538785, "learning_rate": 4.655106138168395e-06, "loss": 17.3129, "step": 29341 }, { "epoch": 0.5363481821339134, "grad_norm": 6.2823091343334525, "learning_rate": 4.654810832296632e-06, "loss": 17.1717, "step": 29342 }, { "epoch": 0.5363664613303599, "grad_norm": 10.37997974822832, "learning_rate": 4.654515527634725e-06, "loss": 17.3777, "step": 29343 }, { "epoch": 0.5363847405268064, "grad_norm": 6.325962697313978, "learning_rate": 4.654220224183715e-06, "loss": 17.6281, "step": 29344 }, { "epoch": 0.536403019723253, "grad_norm": 6.839796889503244, "learning_rate": 4.653924921944631e-06, "loss": 17.6442, "step": 29345 }, { "epoch": 0.5364212989196995, "grad_norm": 5.720004386686231, "learning_rate": 4.653629620918509e-06, "loss": 17.0727, "step": 29346 }, { "epoch": 0.5364395781161461, "grad_norm": 7.202606278330624, "learning_rate": 4.653334321106387e-06, "loss": 17.9296, "step": 29347 }, { "epoch": 0.5364578573125925, "grad_norm": 5.618062835825139, "learning_rate": 4.653039022509298e-06, "loss": 17.0657, "step": 29348 }, { "epoch": 0.536476136509039, "grad_norm": 5.433691777755176, "learning_rate": 4.652743725128275e-06, "loss": 17.0539, "step": 29349 }, { "epoch": 0.5364944157054856, "grad_norm": 6.402264889054256, "learning_rate": 4.652448428964358e-06, "loss": 17.3862, "step": 29350 }, { "epoch": 0.5365126949019321, "grad_norm": 7.8667460832180724, "learning_rate": 4.652153134018577e-06, "loss": 18.1236, "step": 29351 }, { "epoch": 0.5365309740983787, "grad_norm": 6.508666560921784, "learning_rate": 4.651857840291969e-06, "loss": 17.5245, "step": 29352 }, { "epoch": 0.5365492532948252, "grad_norm": 7.228950229054558, "learning_rate": 4.651562547785571e-06, "loss": 18.0504, "step": 29353 }, { "epoch": 0.5365675324912716, "grad_norm": 6.948779196738871, "learning_rate": 4.651267256500413e-06, "loss": 17.7902, "step": 29354 }, { "epoch": 0.5365858116877182, "grad_norm": 5.746321231918124, "learning_rate": 4.650971966437537e-06, "loss": 17.2835, "step": 29355 }, { "epoch": 0.5366040908841647, "grad_norm": 6.279429001653878, "learning_rate": 4.650676677597972e-06, "loss": 17.4677, "step": 29356 }, { "epoch": 0.5366223700806113, "grad_norm": 7.049565430130494, "learning_rate": 4.650381389982754e-06, "loss": 17.4014, "step": 29357 }, { "epoch": 0.5366406492770578, "grad_norm": 6.241784604979106, "learning_rate": 4.6500861035929195e-06, "loss": 17.1587, "step": 29358 }, { "epoch": 0.5366589284735043, "grad_norm": 5.489016065724621, "learning_rate": 4.6497908184295044e-06, "loss": 17.1332, "step": 29359 }, { "epoch": 0.5366772076699509, "grad_norm": 6.850144707783792, "learning_rate": 4.649495534493539e-06, "loss": 18.0111, "step": 29360 }, { "epoch": 0.5366954868663973, "grad_norm": 6.81380763397933, "learning_rate": 4.649200251786063e-06, "loss": 17.415, "step": 29361 }, { "epoch": 0.5367137660628438, "grad_norm": 9.109713823789997, "learning_rate": 4.648904970308111e-06, "loss": 18.4835, "step": 29362 }, { "epoch": 0.5367320452592904, "grad_norm": 6.870680971815996, "learning_rate": 4.6486096900607145e-06, "loss": 17.4854, "step": 29363 }, { "epoch": 0.5367503244557369, "grad_norm": 7.654470596092475, "learning_rate": 4.648314411044912e-06, "loss": 17.8558, "step": 29364 }, { "epoch": 0.5367686036521835, "grad_norm": 6.540666871477112, "learning_rate": 4.648019133261735e-06, "loss": 17.4551, "step": 29365 }, { "epoch": 0.53678688284863, "grad_norm": 7.060905809863112, "learning_rate": 4.647723856712222e-06, "loss": 17.41, "step": 29366 }, { "epoch": 0.5368051620450764, "grad_norm": 6.528052422282426, "learning_rate": 4.6474285813974075e-06, "loss": 17.6751, "step": 29367 }, { "epoch": 0.536823441241523, "grad_norm": 5.822993964637325, "learning_rate": 4.647133307318322e-06, "loss": 17.2885, "step": 29368 }, { "epoch": 0.5368417204379695, "grad_norm": 7.361284339128492, "learning_rate": 4.646838034476007e-06, "loss": 17.6867, "step": 29369 }, { "epoch": 0.5368599996344161, "grad_norm": 8.702266070282723, "learning_rate": 4.646542762871492e-06, "loss": 18.0663, "step": 29370 }, { "epoch": 0.5368782788308626, "grad_norm": 7.680265628120438, "learning_rate": 4.646247492505813e-06, "loss": 17.5257, "step": 29371 }, { "epoch": 0.5368965580273091, "grad_norm": 6.756423681184469, "learning_rate": 4.645952223380009e-06, "loss": 17.6017, "step": 29372 }, { "epoch": 0.5369148372237557, "grad_norm": 5.657934085083988, "learning_rate": 4.645656955495111e-06, "loss": 17.3026, "step": 29373 }, { "epoch": 0.5369331164202021, "grad_norm": 5.797227153979704, "learning_rate": 4.645361688852153e-06, "loss": 17.2116, "step": 29374 }, { "epoch": 0.5369513956166487, "grad_norm": 7.409386454521562, "learning_rate": 4.645066423452171e-06, "loss": 17.8287, "step": 29375 }, { "epoch": 0.5369696748130952, "grad_norm": 6.785129146466672, "learning_rate": 4.644771159296203e-06, "loss": 17.5434, "step": 29376 }, { "epoch": 0.5369879540095417, "grad_norm": 7.335645828702823, "learning_rate": 4.644475896385278e-06, "loss": 17.5533, "step": 29377 }, { "epoch": 0.5370062332059883, "grad_norm": 5.843956419485704, "learning_rate": 4.644180634720437e-06, "loss": 17.1225, "step": 29378 }, { "epoch": 0.5370245124024348, "grad_norm": 5.8218643680485815, "learning_rate": 4.643885374302711e-06, "loss": 17.4456, "step": 29379 }, { "epoch": 0.5370427915988814, "grad_norm": 6.76200635501882, "learning_rate": 4.643590115133135e-06, "loss": 17.3823, "step": 29380 }, { "epoch": 0.5370610707953278, "grad_norm": 5.566491416400289, "learning_rate": 4.643294857212746e-06, "loss": 17.1918, "step": 29381 }, { "epoch": 0.5370793499917743, "grad_norm": 6.260268604229605, "learning_rate": 4.642999600542576e-06, "loss": 17.474, "step": 29382 }, { "epoch": 0.5370976291882209, "grad_norm": 6.0218129939675835, "learning_rate": 4.642704345123664e-06, "loss": 17.4573, "step": 29383 }, { "epoch": 0.5371159083846674, "grad_norm": 6.530548656154416, "learning_rate": 4.64240909095704e-06, "loss": 17.5706, "step": 29384 }, { "epoch": 0.537134187581114, "grad_norm": 6.071058171613956, "learning_rate": 4.642113838043741e-06, "loss": 17.3123, "step": 29385 }, { "epoch": 0.5371524667775605, "grad_norm": 5.5482326896115595, "learning_rate": 4.641818586384803e-06, "loss": 17.1153, "step": 29386 }, { "epoch": 0.5371707459740069, "grad_norm": 6.211957718250558, "learning_rate": 4.64152333598126e-06, "loss": 17.2345, "step": 29387 }, { "epoch": 0.5371890251704535, "grad_norm": 5.699175348587522, "learning_rate": 4.641228086834146e-06, "loss": 17.0164, "step": 29388 }, { "epoch": 0.5372073043669, "grad_norm": 6.010587503966536, "learning_rate": 4.640932838944497e-06, "loss": 17.2308, "step": 29389 }, { "epoch": 0.5372255835633466, "grad_norm": 6.244570599895282, "learning_rate": 4.640637592313347e-06, "loss": 17.351, "step": 29390 }, { "epoch": 0.5372438627597931, "grad_norm": 6.78333587828515, "learning_rate": 4.64034234694173e-06, "loss": 17.6876, "step": 29391 }, { "epoch": 0.5372621419562396, "grad_norm": 6.678429894941688, "learning_rate": 4.640047102830683e-06, "loss": 17.5395, "step": 29392 }, { "epoch": 0.5372804211526861, "grad_norm": 7.160387268459865, "learning_rate": 4.63975185998124e-06, "loss": 17.7461, "step": 29393 }, { "epoch": 0.5372987003491326, "grad_norm": 6.526628635386126, "learning_rate": 4.639456618394434e-06, "loss": 17.4234, "step": 29394 }, { "epoch": 0.5373169795455792, "grad_norm": 5.693856248389524, "learning_rate": 4.639161378071303e-06, "loss": 17.178, "step": 29395 }, { "epoch": 0.5373352587420257, "grad_norm": 6.264029564666308, "learning_rate": 4.638866139012879e-06, "loss": 17.2177, "step": 29396 }, { "epoch": 0.5373535379384722, "grad_norm": 5.949745882098424, "learning_rate": 4.638570901220197e-06, "loss": 17.4326, "step": 29397 }, { "epoch": 0.5373718171349188, "grad_norm": 8.495532980691907, "learning_rate": 4.638275664694295e-06, "loss": 17.5507, "step": 29398 }, { "epoch": 0.5373900963313653, "grad_norm": 8.76437569547379, "learning_rate": 4.637980429436203e-06, "loss": 18.227, "step": 29399 }, { "epoch": 0.5374083755278118, "grad_norm": 5.973602951977648, "learning_rate": 4.63768519544696e-06, "loss": 17.5578, "step": 29400 }, { "epoch": 0.5374266547242583, "grad_norm": 4.959636867896732, "learning_rate": 4.637389962727598e-06, "loss": 16.8575, "step": 29401 }, { "epoch": 0.5374449339207048, "grad_norm": 6.858806662125633, "learning_rate": 4.637094731279152e-06, "loss": 18.0463, "step": 29402 }, { "epoch": 0.5374632131171514, "grad_norm": 5.708286998701389, "learning_rate": 4.6367995011026596e-06, "loss": 17.1434, "step": 29403 }, { "epoch": 0.5374814923135979, "grad_norm": 6.168848984205421, "learning_rate": 4.636504272199153e-06, "loss": 17.3139, "step": 29404 }, { "epoch": 0.5374997715100445, "grad_norm": 5.401303803194577, "learning_rate": 4.636209044569665e-06, "loss": 17.1362, "step": 29405 }, { "epoch": 0.537518050706491, "grad_norm": 7.160912187803219, "learning_rate": 4.635913818215234e-06, "loss": 17.6565, "step": 29406 }, { "epoch": 0.5375363299029374, "grad_norm": 6.024653480766796, "learning_rate": 4.635618593136896e-06, "loss": 17.083, "step": 29407 }, { "epoch": 0.537554609099384, "grad_norm": 6.465684858822787, "learning_rate": 4.63532336933568e-06, "loss": 17.5774, "step": 29408 }, { "epoch": 0.5375728882958305, "grad_norm": 6.705385820450828, "learning_rate": 4.6350281468126255e-06, "loss": 17.3118, "step": 29409 }, { "epoch": 0.5375911674922771, "grad_norm": 6.960883044363678, "learning_rate": 4.6347329255687654e-06, "loss": 17.7671, "step": 29410 }, { "epoch": 0.5376094466887236, "grad_norm": 5.949739040229357, "learning_rate": 4.634437705605133e-06, "loss": 17.1729, "step": 29411 }, { "epoch": 0.53762772588517, "grad_norm": 8.008650712910079, "learning_rate": 4.634142486922767e-06, "loss": 18.3337, "step": 29412 }, { "epoch": 0.5376460050816166, "grad_norm": 6.381688136674593, "learning_rate": 4.633847269522698e-06, "loss": 17.7893, "step": 29413 }, { "epoch": 0.5376642842780631, "grad_norm": 6.129539403024211, "learning_rate": 4.633552053405965e-06, "loss": 17.2959, "step": 29414 }, { "epoch": 0.5376825634745097, "grad_norm": 6.8010730349317345, "learning_rate": 4.6332568385735985e-06, "loss": 17.4192, "step": 29415 }, { "epoch": 0.5377008426709562, "grad_norm": 7.749552261047914, "learning_rate": 4.632961625026634e-06, "loss": 17.8023, "step": 29416 }, { "epoch": 0.5377191218674027, "grad_norm": 7.227173298445376, "learning_rate": 4.632666412766109e-06, "loss": 17.5013, "step": 29417 }, { "epoch": 0.5377374010638493, "grad_norm": 6.589930903699898, "learning_rate": 4.632371201793057e-06, "loss": 18.0213, "step": 29418 }, { "epoch": 0.5377556802602957, "grad_norm": 7.1108711693286955, "learning_rate": 4.63207599210851e-06, "loss": 17.7495, "step": 29419 }, { "epoch": 0.5377739594567423, "grad_norm": 5.756677243444879, "learning_rate": 4.6317807837135055e-06, "loss": 17.1539, "step": 29420 }, { "epoch": 0.5377922386531888, "grad_norm": 5.851982032084623, "learning_rate": 4.631485576609078e-06, "loss": 17.285, "step": 29421 }, { "epoch": 0.5378105178496353, "grad_norm": 5.69020733797831, "learning_rate": 4.63119037079626e-06, "loss": 17.2473, "step": 29422 }, { "epoch": 0.5378287970460819, "grad_norm": 6.562588678630628, "learning_rate": 4.63089516627609e-06, "loss": 17.6663, "step": 29423 }, { "epoch": 0.5378470762425284, "grad_norm": 5.940777519911705, "learning_rate": 4.630599963049599e-06, "loss": 17.0966, "step": 29424 }, { "epoch": 0.537865355438975, "grad_norm": 5.076806333792547, "learning_rate": 4.630304761117822e-06, "loss": 17.0302, "step": 29425 }, { "epoch": 0.5378836346354214, "grad_norm": 5.255430406037884, "learning_rate": 4.630009560481797e-06, "loss": 17.2251, "step": 29426 }, { "epoch": 0.5379019138318679, "grad_norm": 7.216822730025074, "learning_rate": 4.629714361142555e-06, "loss": 17.9002, "step": 29427 }, { "epoch": 0.5379201930283145, "grad_norm": 6.092894704779971, "learning_rate": 4.629419163101133e-06, "loss": 17.4818, "step": 29428 }, { "epoch": 0.537938472224761, "grad_norm": 5.758073355618983, "learning_rate": 4.629123966358564e-06, "loss": 17.1897, "step": 29429 }, { "epoch": 0.5379567514212075, "grad_norm": 7.515799397134103, "learning_rate": 4.628828770915882e-06, "loss": 17.6309, "step": 29430 }, { "epoch": 0.5379750306176541, "grad_norm": 6.877101783486477, "learning_rate": 4.628533576774125e-06, "loss": 17.9767, "step": 29431 }, { "epoch": 0.5379933098141005, "grad_norm": 5.612593666034816, "learning_rate": 4.628238383934326e-06, "loss": 17.1924, "step": 29432 }, { "epoch": 0.5380115890105471, "grad_norm": 6.022177368941253, "learning_rate": 4.627943192397517e-06, "loss": 17.4696, "step": 29433 }, { "epoch": 0.5380298682069936, "grad_norm": 6.01082937675309, "learning_rate": 4.627648002164736e-06, "loss": 17.101, "step": 29434 }, { "epoch": 0.5380481474034401, "grad_norm": 6.908720313695068, "learning_rate": 4.627352813237017e-06, "loss": 17.7684, "step": 29435 }, { "epoch": 0.5380664265998867, "grad_norm": 6.934726264491708, "learning_rate": 4.627057625615392e-06, "loss": 17.4942, "step": 29436 }, { "epoch": 0.5380847057963332, "grad_norm": 6.767730609440472, "learning_rate": 4.6267624393009e-06, "loss": 17.6882, "step": 29437 }, { "epoch": 0.5381029849927798, "grad_norm": 5.363970931806698, "learning_rate": 4.626467254294572e-06, "loss": 17.1753, "step": 29438 }, { "epoch": 0.5381212641892262, "grad_norm": 5.659846540324078, "learning_rate": 4.626172070597444e-06, "loss": 17.3222, "step": 29439 }, { "epoch": 0.5381395433856727, "grad_norm": 6.044542077164771, "learning_rate": 4.625876888210551e-06, "loss": 17.1571, "step": 29440 }, { "epoch": 0.5381578225821193, "grad_norm": 6.570344416048931, "learning_rate": 4.625581707134928e-06, "loss": 17.7369, "step": 29441 }, { "epoch": 0.5381761017785658, "grad_norm": 6.120390875854275, "learning_rate": 4.625286527371606e-06, "loss": 17.0608, "step": 29442 }, { "epoch": 0.5381943809750124, "grad_norm": 6.09306400427329, "learning_rate": 4.6249913489216245e-06, "loss": 17.3157, "step": 29443 }, { "epoch": 0.5382126601714589, "grad_norm": 6.790203034486313, "learning_rate": 4.624696171786012e-06, "loss": 17.7011, "step": 29444 }, { "epoch": 0.5382309393679053, "grad_norm": 7.639083428217476, "learning_rate": 4.624400995965812e-06, "loss": 17.3712, "step": 29445 }, { "epoch": 0.5382492185643519, "grad_norm": 7.516845162250205, "learning_rate": 4.6241058214620516e-06, "loss": 17.8654, "step": 29446 }, { "epoch": 0.5382674977607984, "grad_norm": 7.301650308646706, "learning_rate": 4.623810648275767e-06, "loss": 17.4821, "step": 29447 }, { "epoch": 0.538285776957245, "grad_norm": 7.0783371920980525, "learning_rate": 4.623515476407994e-06, "loss": 17.8513, "step": 29448 }, { "epoch": 0.5383040561536915, "grad_norm": 6.989071061146871, "learning_rate": 4.6232203058597676e-06, "loss": 17.5442, "step": 29449 }, { "epoch": 0.538322335350138, "grad_norm": 6.740616738139947, "learning_rate": 4.6229251366321195e-06, "loss": 17.6039, "step": 29450 }, { "epoch": 0.5383406145465846, "grad_norm": 6.055411461890058, "learning_rate": 4.622629968726087e-06, "loss": 17.2912, "step": 29451 }, { "epoch": 0.538358893743031, "grad_norm": 5.006914991409046, "learning_rate": 4.622334802142705e-06, "loss": 16.853, "step": 29452 }, { "epoch": 0.5383771729394776, "grad_norm": 5.945411939113464, "learning_rate": 4.622039636883004e-06, "loss": 17.2922, "step": 29453 }, { "epoch": 0.5383954521359241, "grad_norm": 6.997866624696268, "learning_rate": 4.6217444729480235e-06, "loss": 17.3213, "step": 29454 }, { "epoch": 0.5384137313323706, "grad_norm": 4.842755316183787, "learning_rate": 4.621449310338795e-06, "loss": 16.904, "step": 29455 }, { "epoch": 0.5384320105288172, "grad_norm": 6.44555934151634, "learning_rate": 4.621154149056353e-06, "loss": 17.318, "step": 29456 }, { "epoch": 0.5384502897252637, "grad_norm": 5.63938653182184, "learning_rate": 4.620858989101735e-06, "loss": 17.2163, "step": 29457 }, { "epoch": 0.5384685689217102, "grad_norm": 6.847603108257481, "learning_rate": 4.62056383047597e-06, "loss": 17.8226, "step": 29458 }, { "epoch": 0.5384868481181567, "grad_norm": 6.495474202642668, "learning_rate": 4.620268673180099e-06, "loss": 17.6462, "step": 29459 }, { "epoch": 0.5385051273146032, "grad_norm": 6.36912065411912, "learning_rate": 4.6199735172151525e-06, "loss": 17.4788, "step": 29460 }, { "epoch": 0.5385234065110498, "grad_norm": 7.569506157647184, "learning_rate": 4.619678362582164e-06, "loss": 17.3841, "step": 29461 }, { "epoch": 0.5385416857074963, "grad_norm": 6.13074048656206, "learning_rate": 4.619383209282173e-06, "loss": 17.304, "step": 29462 }, { "epoch": 0.5385599649039429, "grad_norm": 5.6353209803185536, "learning_rate": 4.61908805731621e-06, "loss": 17.044, "step": 29463 }, { "epoch": 0.5385782441003893, "grad_norm": 6.529993248223105, "learning_rate": 4.618792906685308e-06, "loss": 17.5979, "step": 29464 }, { "epoch": 0.5385965232968358, "grad_norm": 5.907159939389777, "learning_rate": 4.618497757390505e-06, "loss": 17.3092, "step": 29465 }, { "epoch": 0.5386148024932824, "grad_norm": 9.227590186808916, "learning_rate": 4.618202609432836e-06, "loss": 18.4866, "step": 29466 }, { "epoch": 0.5386330816897289, "grad_norm": 5.495784479179967, "learning_rate": 4.617907462813332e-06, "loss": 17.33, "step": 29467 }, { "epoch": 0.5386513608861755, "grad_norm": 6.004887540725572, "learning_rate": 4.6176123175330295e-06, "loss": 17.0819, "step": 29468 }, { "epoch": 0.538669640082622, "grad_norm": 6.377693620330323, "learning_rate": 4.617317173592963e-06, "loss": 17.5609, "step": 29469 }, { "epoch": 0.5386879192790685, "grad_norm": 7.296511044930412, "learning_rate": 4.617022030994165e-06, "loss": 17.7308, "step": 29470 }, { "epoch": 0.538706198475515, "grad_norm": 6.2670869074828355, "learning_rate": 4.616726889737673e-06, "loss": 17.5962, "step": 29471 }, { "epoch": 0.5387244776719615, "grad_norm": 6.273506710374839, "learning_rate": 4.616431749824519e-06, "loss": 17.3744, "step": 29472 }, { "epoch": 0.5387427568684081, "grad_norm": 8.194156052425928, "learning_rate": 4.61613661125574e-06, "loss": 18.4392, "step": 29473 }, { "epoch": 0.5387610360648546, "grad_norm": 5.372760051340595, "learning_rate": 4.615841474032368e-06, "loss": 17.0274, "step": 29474 }, { "epoch": 0.5387793152613011, "grad_norm": 7.082151687429085, "learning_rate": 4.615546338155438e-06, "loss": 17.681, "step": 29475 }, { "epoch": 0.5387975944577477, "grad_norm": 6.496208767191889, "learning_rate": 4.615251203625986e-06, "loss": 17.3553, "step": 29476 }, { "epoch": 0.5388158736541941, "grad_norm": 6.762368717166169, "learning_rate": 4.614956070445045e-06, "loss": 17.6863, "step": 29477 }, { "epoch": 0.5388341528506407, "grad_norm": 5.503882816305648, "learning_rate": 4.614660938613648e-06, "loss": 17.2541, "step": 29478 }, { "epoch": 0.5388524320470872, "grad_norm": 6.676343953056528, "learning_rate": 4.614365808132832e-06, "loss": 17.4198, "step": 29479 }, { "epoch": 0.5388707112435337, "grad_norm": 7.722256220464426, "learning_rate": 4.61407067900363e-06, "loss": 18.0793, "step": 29480 }, { "epoch": 0.5388889904399803, "grad_norm": 5.1380679544098715, "learning_rate": 4.613775551227076e-06, "loss": 16.9727, "step": 29481 }, { "epoch": 0.5389072696364268, "grad_norm": 6.9937525316139135, "learning_rate": 4.613480424804207e-06, "loss": 17.459, "step": 29482 }, { "epoch": 0.5389255488328734, "grad_norm": 6.575303936009218, "learning_rate": 4.613185299736055e-06, "loss": 17.5852, "step": 29483 }, { "epoch": 0.5389438280293198, "grad_norm": 6.0547981093976695, "learning_rate": 4.612890176023654e-06, "loss": 17.2239, "step": 29484 }, { "epoch": 0.5389621072257663, "grad_norm": 6.979537515178285, "learning_rate": 4.61259505366804e-06, "loss": 17.7887, "step": 29485 }, { "epoch": 0.5389803864222129, "grad_norm": 5.574405040408401, "learning_rate": 4.612299932670248e-06, "loss": 17.0227, "step": 29486 }, { "epoch": 0.5389986656186594, "grad_norm": 5.957891024767679, "learning_rate": 4.6120048130313085e-06, "loss": 17.4767, "step": 29487 }, { "epoch": 0.539016944815106, "grad_norm": 6.508454385681888, "learning_rate": 4.611709694752259e-06, "loss": 17.4061, "step": 29488 }, { "epoch": 0.5390352240115525, "grad_norm": 7.952240112241982, "learning_rate": 4.611414577834134e-06, "loss": 18.545, "step": 29489 }, { "epoch": 0.5390535032079989, "grad_norm": 6.074578112519142, "learning_rate": 4.611119462277968e-06, "loss": 17.3847, "step": 29490 }, { "epoch": 0.5390717824044455, "grad_norm": 7.042071628695794, "learning_rate": 4.6108243480847945e-06, "loss": 17.6878, "step": 29491 }, { "epoch": 0.539090061600892, "grad_norm": 7.742850166745321, "learning_rate": 4.610529235255646e-06, "loss": 17.825, "step": 29492 }, { "epoch": 0.5391083407973386, "grad_norm": 7.16135930848796, "learning_rate": 4.61023412379156e-06, "loss": 17.7906, "step": 29493 }, { "epoch": 0.5391266199937851, "grad_norm": 6.872807704777655, "learning_rate": 4.60993901369357e-06, "loss": 17.2553, "step": 29494 }, { "epoch": 0.5391448991902316, "grad_norm": 7.036621368448833, "learning_rate": 4.609643904962709e-06, "loss": 17.7991, "step": 29495 }, { "epoch": 0.5391631783866782, "grad_norm": 6.140199390986187, "learning_rate": 4.609348797600013e-06, "loss": 17.288, "step": 29496 }, { "epoch": 0.5391814575831246, "grad_norm": 5.393724906218478, "learning_rate": 4.609053691606516e-06, "loss": 17.3026, "step": 29497 }, { "epoch": 0.5391997367795711, "grad_norm": 6.618458585201584, "learning_rate": 4.608758586983249e-06, "loss": 17.4113, "step": 29498 }, { "epoch": 0.5392180159760177, "grad_norm": 6.38221382609543, "learning_rate": 4.608463483731253e-06, "loss": 17.3821, "step": 29499 }, { "epoch": 0.5392362951724642, "grad_norm": 6.160402299489039, "learning_rate": 4.6081683818515575e-06, "loss": 17.4905, "step": 29500 } ], "logging_steps": 1.0, "max_steps": 54707, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }